The unified diff between revisions [29392f11..] and [be264b7e..] is displayed below. It can also be downloaded as a raw diff.
#
#
# add_file "bn.ilg"
# content [c43c9b999b935cc07f1e830cae687ab5da0f4347]
#
# add_file "bn.ind"
# content [d3672deebf8890700d73790142827796d5f56175]
#
# add_file "bn.pdf"
# content [923c23aab5777eba26e14b0deb1a36d789a395cd]
#
# add_file "bn.tex"
# content [70b4a19f7e56d97b65862b33a7098d919ae8d771]
#
# add_file "poster.out"
# content [da39a3ee5e6b4b0d3255bfef95601890afd80709]
#
# add_file "poster.pdf"
# content [1143d49990d87bcd05f20e771bcd0a25495ce771]
#
# add_file "poster.tex"
# content [1fe3768a9d3d68873cd1b468f34e6b0474ac29dc]
#
# add_file "tommath.out"
# content [8b1ec2f5e8f380908d72001636ea44581d64bf76]
#
# add_file "tommath.pdf"
# content [9be27d0ab14ad51f364bca4f66b8343bd23d8643]
#
# add_file "tommath.src"
# content [5d6ecf084c52bb30161a65e0db3f71b3a0fb4882]
#
# add_file "tommath.tex"
# content [783ba41e4c5f114960672d30915a5446715c80fc]
#
============================================================
--- bn.ilg c43c9b999b935cc07f1e830cae687ab5da0f4347
+++ bn.ilg c43c9b999b935cc07f1e830cae687ab5da0f4347
@@ -0,0 +1,6 @@
+This is makeindex, version 2.14 [02-Oct-2002] (kpathsea + Thai support).
+Scanning input file bn.idx....done (79 entries accepted, 0 rejected).
+Sorting entries....done (511 comparisons).
+Generating output file bn.ind....done (82 lines written, 0 warnings).
+Output written in bn.ind.
+Transcript written in bn.ilg.
============================================================
--- bn.ind d3672deebf8890700d73790142827796d5f56175
+++ bn.ind d3672deebf8890700d73790142827796d5f56175
@@ -0,0 +1,82 @@
+\begin{theindex}
+
+ \item mp\_add, \hyperpage{25}
+ \item mp\_add\_d, \hyperpage{48}
+ \item mp\_and, \hyperpage{25}
+ \item mp\_clear, \hyperpage{7}
+ \item mp\_clear\_multi, \hyperpage{8}
+ \item mp\_cmp, \hyperpage{20}
+ \item mp\_cmp\_d, \hyperpage{21}
+ \item mp\_cmp\_mag, \hyperpage{19}
+ \item mp\_div, \hyperpage{26}
+ \item mp\_div\_2, \hyperpage{22}
+ \item mp\_div\_2d, \hyperpage{24}
+ \item mp\_div\_d, \hyperpage{48}
+ \item mp\_dr\_reduce, \hyperpage{36}
+ \item mp\_dr\_setup, \hyperpage{36}
+ \item MP\_EQ, \hyperpage{18}
+ \item mp\_error\_to\_string, \hyperpage{6}
+ \item mp\_expt\_d, \hyperpage{39}
+ \item mp\_exptmod, \hyperpage{39}
+ \item mp\_exteuclid, \hyperpage{47}
+ \item mp\_gcd, \hyperpage{47}
+ \item mp\_get\_int, \hyperpage{16}
+ \item mp\_grow, \hyperpage{12}
+ \item MP\_GT, \hyperpage{18}
+ \item mp\_init, \hyperpage{7}
+ \item mp\_init\_copy, \hyperpage{9}
+ \item mp\_init\_multi, \hyperpage{8}
+ \item mp\_init\_set, \hyperpage{17}
+ \item mp\_init\_set\_int, \hyperpage{17}
+ \item mp\_init\_size, \hyperpage{10}
+ \item mp\_int, \hyperpage{6}
+ \item mp\_invmod, \hyperpage{48}
+ \item mp\_jacobi, \hyperpage{48}
+ \item mp\_lcm, \hyperpage{47}
+ \item mp\_lshd, \hyperpage{24}
+ \item MP\_LT, \hyperpage{18}
+ \item MP\_MEM, \hyperpage{5}
+ \item mp\_mod, \hyperpage{31}
+ \item mp\_mod\_d, \hyperpage{48}
+ \item mp\_montgomery\_calc\_normalization, \hyperpage{34}
+ \item mp\_montgomery\_reduce, \hyperpage{33}
+ \item mp\_montgomery\_setup, \hyperpage{33}
+ \item mp\_mul, \hyperpage{27}
+ \item mp\_mul\_2, \hyperpage{22}
+ \item mp\_mul\_2d, \hyperpage{24}
+ \item mp\_mul\_d, \hyperpage{48}
+ \item mp\_n\_root, \hyperpage{40}
+ \item mp\_neg, \hyperpage{25}
+ \item MP\_NO, \hyperpage{5}
+ \item MP\_OKAY, \hyperpage{5}
+ \item mp\_or, \hyperpage{25}
+ \item mp\_prime\_fermat, \hyperpage{41}
+ \item mp\_prime\_is\_divisible, \hyperpage{41}
+ \item mp\_prime\_is\_prime, \hyperpage{42}
+ \item mp\_prime\_miller\_rabin, \hyperpage{41}
+ \item mp\_prime\_next\_prime, \hyperpage{42}
+ \item mp\_prime\_rabin\_miller\_trials, \hyperpage{42}
+ \item mp\_prime\_random, \hyperpage{43}
+ \item mp\_prime\_random\_ex, \hyperpage{43}
+ \item mp\_radix\_size, \hyperpage{45}
+ \item mp\_read\_radix, \hyperpage{45}
+ \item mp\_read\_unsigned\_bin, \hyperpage{46}
+ \item mp\_reduce, \hyperpage{32}
+ \item mp\_reduce\_2k, \hyperpage{37}
+ \item mp\_reduce\_2k\_setup, \hyperpage{37}
+ \item mp\_reduce\_setup, \hyperpage{32}
+ \item mp\_rshd, \hyperpage{24}
+ \item mp\_set, \hyperpage{15}
+ \item mp\_set\_int, \hyperpage{16}
+ \item mp\_shrink, \hyperpage{11}
+ \item mp\_sqr, \hyperpage{29}
+ \item mp\_sub, \hyperpage{25}
+ \item mp\_sub\_d, \hyperpage{48}
+ \item mp\_to\_unsigned\_bin, \hyperpage{46}
+ \item mp\_toradix, \hyperpage{45}
+ \item mp\_unsigned\_bin\_size, \hyperpage{46}
+ \item MP\_VAL, \hyperpage{5}
+ \item mp\_xor, \hyperpage{25}
+ \item MP\_YES, \hyperpage{5}
+
+\end{theindex}
============================================================
# bn.pdf is binary
============================================================
--- bn.tex 70b4a19f7e56d97b65862b33a7098d919ae8d771
+++ bn.tex 70b4a19f7e56d97b65862b33a7098d919ae8d771
@@ -0,0 +1,1733 @@
+\documentclass[b5paper]{book}
+\usepackage{hyperref}
+\usepackage{makeidx}
+\usepackage{amssymb}
+\usepackage{color}
+\usepackage{alltt}
+\usepackage{graphicx}
+\usepackage{layout}
+\def\union{\cup}
+\def\intersect{\cap}
+\def\getsrandom{\stackrel{\rm R}{\gets}}
+\def\cross{\times}
+\def\cat{\hspace{0.5em} \| \hspace{0.5em}}
+\def\catn{$\|$}
+\def\divides{\hspace{0.3em} | \hspace{0.3em}}
+\def\nequiv{\not\equiv}
+\def\approx{\raisebox{0.2ex}{\mbox{\small $\sim$}}}
+\def\lcm{{\rm lcm}}
+\def\gcd{{\rm gcd}}
+\def\log{{\rm log}}
+\def\ord{{\rm ord}}
+\def\abs{{\mathit abs}}
+\def\rep{{\mathit rep}}
+\def\mod{{\mathit\ mod\ }}
+\renewcommand{\pmod}[1]{\ ({\rm mod\ }{#1})}
+\newcommand{\floor}[1]{\left\lfloor{#1}\right\rfloor}
+\newcommand{\ceil}[1]{\left\lceil{#1}\right\rceil}
+\def\Or{{\rm\ or\ }}
+\def\And{{\rm\ and\ }}
+\def\iff{\hspace{1em}\Longleftrightarrow\hspace{1em}}
+\def\implies{\Rightarrow}
+\def\undefined{{\rm ``undefined"}}
+\def\Proof{\vspace{1ex}\noindent {\bf Proof:}\hspace{1em}}
+\let\oldphi\phi
+\def\phi{\varphi}
+\def\Pr{{\rm Pr}}
+\newcommand{\str}[1]{{\mathbf{#1}}}
+\def\F{{\mathbb F}}
+\def\N{{\mathbb N}}
+\def\Z{{\mathbb Z}}
+\def\R{{\mathbb R}}
+\def\C{{\mathbb C}}
+\def\Q{{\mathbb Q}}
+\definecolor{DGray}{gray}{0.5}
+\newcommand{\emailaddr}[1]{\mbox{$<${#1}$>$}}
+\def\twiddle{\raisebox{0.3ex}{\mbox{\tiny $\sim$}}}
+\def\gap{\vspace{0.5ex}}
+\makeindex
+\begin{document}
+\frontmatter
+\pagestyle{empty}
+\title{LibTomMath User Manual \\ v0.30}
+\author{Tom St Denis \\ tomstdenis@iahu.ca}
+\maketitle
+This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been
+formatted for B5 [176x250] paper using the \LaTeX{} {\em book} macro package.
+
+\vspace{10cm}
+
+\begin{flushright}Open Source. Open Academia. Open Minds.
+
+\mbox{ }
+
+Tom St Denis,
+
+Ontario, Canada
+\end{flushright}
+
+\tableofcontents
+\listoffigures
+\mainmatter
+\pagestyle{headings}
+\chapter{Introduction}
+\section{What is LibTomMath?}
+LibTomMath is a library of source code which provides a series of efficient and carefully written functions for manipulating
+large integer numbers. It was written in portable ISO C source code so that it will build on any platform with a conforming
+C compiler.
+
+In a nutshell the library was written from scratch with verbose comments to help instruct computer science students how
+to implement ``bignum'' math. However, the resulting code has proven to be very useful. It has been used by numerous
+universities, commercial and open source software developers. It has been used on a variety of platforms ranging from
+Linux and Windows based x86 to ARM based Gameboys and PPC based MacOS machines.
+
+\section{License}
+As of the v0.25 the library source code has been placed in the public domain with every new release. As of the v0.28
+release the textbook ``Implementing Multiple Precision Arithmetic'' has been placed in the public domain with every new
+release as well. This textbook is meant to compliment the project by providing a more solid walkthrough of the development
+algorithms used in the library.
+
+Since both\footnote{Note that the MPI files under mtest/ are copyrighted by Michael Fromberger. They are not required to use LibTomMath.} are in the
+public domain everyone is entitled to do with them as they see fit.
+
+\section{Building LibTomMath}
+
+LibTomMath is meant to be very ``GCC friendly'' as it comes with a makefile well suited for GCC. However, the library will
+also build in MSVC, Borland C out of the box. For any other ISO C compiler a makefile will have to be made by the end
+developer.
+
+To build the library for GCC simply issue the
+
+\begin{alltt}
+make
+\end{alltt}
+
+command. This will build the library and archive the object files in ``libtommath.a''. Now you simply link against that
+and include ``tommath.h'' within your programs.
+
+Alternatively to build with MSVC type
+
+\begin{alltt}
+nmake -f makefile.msvc
+\end{alltt}
+
+This will build the library and archive the object files in ``tommath.lib''. This has been tested with MSVC version 6.00
+with service pack 5.
+
+There is limited support for making a ``DLL'' in windows via the ``makefile.cygwin\_dll'' makefile. It requires Cygwin
+to work with since it requires the auto-export/import functionality. The resulting DLL and imprt library ``libtomcrypt.dll.a''
+can be used to link LibTomMath dynamically to any Windows program using Cygwin.
+
+\subsection{Testing}
+To build the library and the test harness type
+
+\begin{alltt}
+make test
+\end{alltt}
+
+This will build the library, ``test'' and ``mtest/mtest''. The ``test'' program will accept test vectors and verify the
+results. ``mtest/mtest'' will generate test vectors using the MPI library by Michael Fromberger\footnote{A copy of MPI
+is included in the package}. Simply pipe mtest into test using
+
+\begin{alltt}
+mtest/mtest | test
+\end{alltt}
+
+If you do not have a ``/dev/urandom'' style RNG source you will have to write your own PRNG and simply pipe that into
+mtest. For example, if your PRNG program is called ``myprng'' simply invoke
+
+\begin{alltt}
+myprng | mtest/mtest | test
+\end{alltt}
+
+This will output a row of numbers that are increasing. Each column is a different test (such as addition, multiplication, etc)
+that is being performed. The numbers represent how many times the test was invoked. If an error is detected the program
+will exit with a dump of the relevent numbers it was working with.
+
+\section{Purpose of LibTomMath}
+Unlike GNU MP (GMP) Library, LIP, OpenSSL or various other commercial kits (Miracl), LibTomMath was not written with
+bleeding edge performance in mind. First and foremost LibTomMath was written to be entirely open. Not only is the
+source code public domain (unlike various other GPL/etc licensed code), not only is the code freely downloadable but the
+source code is also accessible for computer science students attempting to learn ``BigNum'' or multiple precision
+arithmetic techniques.
+
+LibTomMath was written to be an instructive collection of source code. This is why there are many comments, only one
+function per source file and often I use a ``middle-road'' approach where I don't cut corners for an extra 2\% speed
+increase.
+
+Source code alone cannot really teach how the algorithms work which is why I also wrote a textbook that accompanies
+the library (beat that!).
+
+So you may be thinking ``should I use LibTomMath?'' and the answer is a definite maybe. Let me tabulate what I think
+are the pros and cons of LibTomMath by comparing it to the math routines from GnuPG\footnote{GnuPG v1.2.3 versus LibTomMath v0.28}.
+
+\newpage\begin{figure}[here]
+\begin{small}
+\begin{center}
+\begin{tabular}{|l|c|c|l|}
+\hline \textbf{Criteria} & \textbf{Pro} & \textbf{Con} & \textbf{Notes} \\
+\hline Few lines of code per file & X & & GnuPG $ = 300.9$, LibTomMath $ = 76.04$ \\
+\hline Commented function prototypes & X && GnuPG function names are cryptic. \\
+\hline Speed && X & LibTomMath is slower. \\
+\hline Totally free & X & & GPL has unfavourable restrictions.\\
+\hline Large function base & X & & GnuPG is barebones. \\
+\hline Four modular reduction algorithms & X & & Faster modular exponentiation. \\
+\hline Portable & X & & GnuPG requires configuration to build. \\
+\hline
+\end{tabular}
+\end{center}
+\end{small}
+\caption{LibTomMath Valuation}
+\end{figure}
+
+It may seem odd to compare LibTomMath to GnuPG since the math in GnuPG is only a small portion of the entire application.
+However, LibTomMath was written with cryptography in mind. It provides essentially all of the functions a cryptosystem
+would require when working with large integers.
+
+So it may feel tempting to just rip the math code out of GnuPG (or GnuMP where it was taken from originally) in your
+own application but I think there are reasons not to. While LibTomMath is slower than libraries such as GnuMP it is
+not normally significantly slower. On x86 machines the difference is normally a factor of two when performing modular
+exponentiations.
+
+Essentially the only time you wouldn't use LibTomMath is when blazing speed is the primary concern.
+
+\chapter{Getting Started with LibTomMath}
+\section{Building Programs}
+In order to use LibTomMath you must include ``tommath.h'' and link against the appropriate library file (typically
+libtommath.a). There is no library initialization required and the entire library is thread safe.
+
+\section{Return Codes}
+There are three possible return codes a function may return.
+
+\index{MP\_OKAY}\index{MP\_YES}\index{MP\_NO}\index{MP\_VAL}\index{MP\_MEM}
+\begin{figure}[here!]
+\begin{center}
+\begin{small}
+\begin{tabular}{|l|l|}
+\hline \textbf{Code} & \textbf{Meaning} \\
+\hline MP\_OKAY & The function succeeded. \\
+\hline MP\_VAL & The function input was invalid. \\
+\hline MP\_MEM & Heap memory exhausted. \\
+\hline &\\
+\hline MP\_YES & Response is yes. \\
+\hline MP\_NO & Response is no. \\
+\hline
+\end{tabular}
+\end{small}
+\end{center}
+\caption{Return Codes}
+\end{figure}
+
+The last two codes listed are not actually ``return'ed'' by a function. They are placed in an integer (the caller must
+provide the address of an integer it can store to) which the caller can access. To convert one of the three return codes
+to a string use the following function.
+
+\index{mp\_error\_to\_string}
+\begin{alltt}
+char *mp_error_to_string(int code);
+\end{alltt}
+
+This will return a pointer to a string which describes the given error code. It will not work for the return codes
+MP\_YES and MP\_NO.
+
+\section{Data Types}
+The basic ``multiple precision integer'' type is known as the ``mp\_int'' within LibTomMath. This data type is used to
+organize all of the data required to manipulate the integer it represents. Within LibTomMath it has been prototyped
+as the following.
+
+\index{mp\_int}
+\begin{alltt}
+typedef struct \{
+ int used, alloc, sign;
+ mp_digit *dp;
+\} mp_int;
+\end{alltt}
+
+Where ``mp\_digit'' is a data type that represents individual digits of the integer. By default, an mp\_digit is the
+ISO C ``unsigned long'' data type and each digit is $28-$bits long. The mp\_digit type can be configured to suit other
+platforms by defining the appropriate macros.
+
+All LTM functions that use the mp\_int type will expect a pointer to mp\_int structure. You must allocate memory to
+hold the structure itself by yourself (whether off stack or heap it doesn't matter). The very first thing that must be
+done to use an mp\_int is that it must be initialized.
+
+\section{Function Organization}
+
+The arithmetic functions of the library are all organized to have the same style prototype. That is source operands
+are passed on the left and the destination is on the right. For instance,
+
+\begin{alltt}
+mp_add(&a, &b, &c); /* c = a + b */
+mp_mul(&a, &a, &c); /* c = a * a */
+mp_div(&a, &b, &c, &d); /* c = [a/b], d = a mod b */
+\end{alltt}
+
+Another feature of the way the functions have been implemented is that source operands can be destination operands as well.
+For instance,
+
+\begin{alltt}
+mp_add(&a, &b, &b); /* b = a + b */
+mp_div(&a, &b, &a, &c); /* a = [a/b], c = a mod b */
+\end{alltt}
+
+This allows operands to be re-used which can make programming simpler.
+
+\section{Initialization}
+\subsection{Single Initialization}
+A single mp\_int can be initialized with the ``mp\_init'' function.
+
+\index{mp\_init}
+\begin{alltt}
+int mp_init (mp_int * a);
+\end{alltt}
+
+This function expects a pointer to an mp\_int structure and will initialize the members of the structure so the mp\_int
+represents the default integer which is zero. If the functions returns MP\_OKAY then the mp\_int is ready to be used
+by the other LibTomMath functions.
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int number;
+ int result;
+
+ if ((result = mp_init(&number)) != MP_OKAY) \{
+ printf("Error initializing the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* use the number */
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+\subsection{Single Free}
+When you are finished with an mp\_int it is ideal to return the heap it used back to the system. The following function
+provides this functionality.
+
+\index{mp\_clear}
+\begin{alltt}
+void mp_clear (mp_int * a);
+\end{alltt}
+
+The function expects a pointer to a previously initialized mp\_int structure and frees the heap it uses. It sets the
+pointer\footnote{The ``dp'' member.} within the mp\_int to \textbf{NULL} which is used to prevent double free situations.
+Is is legal to call mp\_clear() twice on the same mp\_int in a row.
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int number;
+ int result;
+
+ if ((result = mp_init(&number)) != MP_OKAY) \{
+ printf("Error initializing the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* use the number */
+
+ /* We're done with it. */
+ mp_clear(&number);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+\subsection{Multiple Initializations}
+Certain algorithms require more than one large integer. In these instances it is ideal to initialize all of the mp\_int
+variables in an ``all or nothing'' fashion. That is, they are either all initialized successfully or they are all
+not initialized.
+
+The mp\_init\_multi() function provides this functionality.
+
+\index{mp\_init\_multi} \index{mp\_clear\_multi}
+\begin{alltt}
+int mp_init_multi(mp_int *mp, ...);
+\end{alltt}
+
+It accepts a \textbf{NULL} terminated list of pointers to mp\_int structures. It will attempt to initialize them all
+at once. If the function returns MP\_OKAY then all of the mp\_int variables are ready to use, otherwise none of them
+are available for use. A complementary mp\_clear\_multi() function allows multiple mp\_int variables to be free'd
+from the heap at the same time.
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int num1, num2, num3;
+ int result;
+
+ if ((result = mp_init_multi(&num1,
+ &num2,
+ &num3, NULL)) != MP\_OKAY) \{
+ printf("Error initializing the numbers. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* use the numbers */
+
+ /* We're done with them. */
+ mp_clear_multi(&num1, &num2, &num3, NULL);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+\subsection{Other Initializers}
+To initialized and make a copy of an mp\_int the mp\_init\_copy() function has been provided.
+
+\index{mp\_init\_copy}
+\begin{alltt}
+int mp_init_copy (mp_int * a, mp_int * b);
+\end{alltt}
+
+This function will initialize $a$ and make it a copy of $b$ if all goes well.
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int num1, num2;
+ int result;
+
+ /* initialize and do work on num1 ... */
+
+ /* We want a copy of num1 in num2 now */
+ if ((result = mp_init_copy(&num2, &num1)) != MP_OKAY) \{
+ printf("Error initializing the copy. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* now num2 is ready and contains a copy of num1 */
+
+ /* We're done with them. */
+ mp_clear_multi(&num1, &num2, NULL);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+Another less common initializer is mp\_init\_size() which allows the user to initialize an mp\_int with a given
+default number of digits. By default, all initializers allocate \textbf{MP\_PREC} digits. This function lets
+you override this behaviour.
+
+\index{mp\_init\_size}
+\begin{alltt}
+int mp_init_size (mp_int * a, int size);
+\end{alltt}
+
+The $size$ parameter must be greater than zero. If the function succeeds the mp\_int $a$ will be initialized
+to have $size$ digits (which are all initially zero).
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int number;
+ int result;
+
+ /* we need a 60-digit number */
+ if ((result = mp_init_size(&number, 60)) != MP_OKAY) \{
+ printf("Error initializing the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* use the number */
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+\section{Maintenance Functions}
+
+\subsection{Reducing Memory Usage}
+When an mp\_int is in a state where it won't be changed again\footnote{A Diffie-Hellman modulus for instance.} excess
+digits can be removed to return memory to the heap with the mp\_shrink() function.
+
+\index{mp\_shrink}
+\begin{alltt}
+int mp_shrink (mp_int * a);
+\end{alltt}
+
+This will remove excess digits of the mp\_int $a$. If the operation fails the mp\_int should be intact without the
+excess digits being removed. Note that you can use a shrunk mp\_int in further computations, however, such operations
+will require heap operations which can be slow. It is not ideal to shrink mp\_int variables that you will further
+modify in the system (unless you are seriously low on memory).
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int number;
+ int result;
+
+ if ((result = mp_init(&number)) != MP_OKAY) \{
+ printf("Error initializing the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* use the number [e.g. pre-computation] */
+
+ /* We're done with it for now. */
+ if ((result = mp_shrink(&number)) != MP_OKAY) \{
+ printf("Error shrinking the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* use it .... */
+
+
+ /* we're done with it. */
+ mp_clear(&number);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+\subsection{Adding additional digits}
+
+Within the mp\_int structure are two parameters which control the limitations of the array of digits that represent
+the integer the mp\_int is meant to equal. The \textit{used} parameter dictates how many digits are significant, that is,
+contribute to the value of the mp\_int. The \textit{alloc} parameter dictates how many digits are currently available in
+the array. If you need to perform an operation that requires more digits you will have to mp\_grow() the mp\_int to
+your desired size.
+
+\index{mp\_grow}
+\begin{alltt}
+int mp_grow (mp_int * a, int size);
+\end{alltt}
+
+This will grow the array of digits of $a$ to $size$. If the \textit{alloc} parameter is already bigger than
+$size$ the function will not do anything.
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int number;
+ int result;
+
+ if ((result = mp_init(&number)) != MP_OKAY) \{
+ printf("Error initializing the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* use the number */
+
+ /* We need to add 20 digits to the number */
+ if ((result = mp_grow(&number, number.alloc + 20)) != MP_OKAY) \{
+ printf("Error growing the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+
+ /* use the number */
+
+ /* we're done with it. */
+ mp_clear(&number);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+\chapter{Basic Operations}
+\section{Small Constants}
+Setting mp\_ints to small constants is a relatively common operation. To accomodate these instances there are two
+small constant assignment functions. The first function is used to set a single digit constant while the second sets
+an ISO C style ``unsigned long'' constant. The reason for both functions is efficiency. Setting a single digit is quick but the
+domain of a digit can change (it's always at least $0 \ldots 127$).
+
+\subsection{Single Digit}
+
+Setting a single digit can be accomplished with the following function.
+
+\index{mp\_set}
+\begin{alltt}
+void mp_set (mp_int * a, mp_digit b);
+\end{alltt}
+
+This will zero the contents of $a$ and make it represent an integer equal to the value of $b$. Note that this
+function has a return type of \textbf{void}. It cannot cause an error so it is safe to assume the function
+succeeded.
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int number;
+ int result;
+
+ if ((result = mp_init(&number)) != MP_OKAY) \{
+ printf("Error initializing the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* set the number to 5 */
+ mp_set(&number, 5);
+
+ /* we're done with it. */
+ mp_clear(&number);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+\subsection{Long Constants}
+
+To set a constant that is the size of an ISO C ``unsigned long'' and larger than a single digit the following function
+can be used.
+
+\index{mp\_set\_int}
+\begin{alltt}
+int mp_set_int (mp_int * a, unsigned long b);
+\end{alltt}
+
+This will assign the value of the 32-bit variable $b$ to the mp\_int $a$. Unlike mp\_set() this function will always
+accept a 32-bit input regardless of the size of a single digit. However, since the value may span several digits
+this function can fail if it runs out of heap memory.
+
+To get the ``unsigned long'' copy of an mp\_int the following function can be used.
+
+\index{mp\_get\_int}
+\begin{alltt}
+unsigned long mp_get_int (mp_int * a);
+\end{alltt}
+
+This will return the 32 least significant bits of the mp\_int $a$.
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int number;
+ int result;
+
+ if ((result = mp_init(&number)) != MP_OKAY) \{
+ printf("Error initializing the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* set the number to 654321 (note this is bigger than 127) */
+ if ((result = mp_set_int(&number, 654321)) != MP_OKAY) \{
+ printf("Error setting the value of the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ printf("number == \%lu", mp_get_int(&number));
+
+ /* we're done with it. */
+ mp_clear(&number);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+This should output the following if the program succeeds.
+
+\begin{alltt}
+number == 654321
+\end{alltt}
+
+\subsection{Initialize and Setting Constants}
+To both initialize and set small constants the following two functions are available.
+\index{mp\_init\_set} \index{mp\_init\_set\_int}
+\begin{alltt}
+int mp_init_set (mp_int * a, mp_digit b);
+int mp_init_set_int (mp_int * a, unsigned long b);
+\end{alltt}
+
+Both functions work like the previous counterparts except they first mp\_init $a$ before setting the values.
+
+\begin{alltt}
+int main(void)
+\{
+ mp_int number1, number2;
+ int result;
+
+ /* initialize and set a single digit */
+ if ((result = mp_init_set(&number1, 100)) != MP_OKAY) \{
+ printf("Error setting number1: \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* initialize and set a long */
+ if ((result = mp_init_set_int(&number2, 1023)) != MP_OKAY) \{
+ printf("Error setting number2: \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* display */
+ printf("Number1, Number2 == \%lu, \%lu",
+ mp_get_int(&number1), mp_get_int(&number2));
+
+ /* clear */
+ mp_clear_multi(&number1, &number2, NULL);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt}
+
+If this program succeeds it shall output.
+\begin{alltt}
+Number1, Number2 == 100, 1023
+\end{alltt}
+
+\section{Comparisons}
+
+Comparisons in LibTomMath are always performed in a ``left to right'' fashion. There are three possible return codes
+for any comparison.
+
+\index{MP\_GT} \index{MP\_EQ} \index{MP\_LT}
+\begin{figure}[here]
+\begin{center}
+\begin{tabular}{|c|c|}
+\hline \textbf{Result Code} & \textbf{Meaning} \\
+\hline MP\_GT & $a > b$ \\
+\hline MP\_EQ & $a = b$ \\
+\hline MP\_LT & $a < b$ \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Comparison Codes for $a, b$}
+\label{fig:CMP}
+\end{figure}
+
+In figure \ref{fig:CMP} two integers $a$ and $b$ are being compared. In this case $a$ is said to be ``to the left'' of
+$b$.
+
+\subsection{Unsigned comparison}
+
+An unsigned comparison considers only the digits themselves and not the associated \textit{sign} flag of the
+mp\_int structures. This is analogous to an absolute comparison. The function mp\_cmp\_mag() will compare two
+mp\_int variables based on their digits only.
+
+\index{mp\_cmp\_mag}
+\begin{alltt}
+int mp_cmp(mp_int * a, mp_int * b);
+\end{alltt}
+This will compare $a$ to $b$ placing $a$ to the left of $b$. This function cannot fail and will return one of the
+three compare codes listed in figure \ref{fig:CMP}.
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int number1, number2;
+ int result;
+
+ if ((result = mp_init_multi(&number1, &number2, NULL)) != MP_OKAY) \{
+ printf("Error initializing the numbers. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* set the number1 to 5 */
+ mp_set(&number1, 5);
+
+ /* set the number2 to -6 */
+ mp_set(&number2, 6);
+ if ((result = mp_neg(&number2, &number2)) != MP_OKAY) \{
+ printf("Error negating number2. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ switch(mp_cmp_mag(&number1, &number2)) \{
+ case MP_GT: printf("|number1| > |number2|"); break;
+ case MP_EQ: printf("|number1| = |number2|"); break;
+ case MP_LT: printf("|number1| < |number2|"); break;
+ \}
+
+ /* we're done with it. */
+ mp_clear_multi(&number1, &number2, NULL);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+If this program\footnote{This function uses the mp\_neg() function which is discussed in section \ref{sec:NEG}.} completes
+successfully it should print the following.
+
+\begin{alltt}
+|number1| < |number2|
+\end{alltt}
+
+This is because $\vert -6 \vert = 6$ and obviously $5 < 6$.
+
+\subsection{Signed comparison}
+
+To compare two mp\_int variables based on their signed value the mp\_cmp() function is provided.
+
+\index{mp\_cmp}
+\begin{alltt}
+int mp_cmp(mp_int * a, mp_int * b);
+\end{alltt}
+
+This will compare $a$ to the left of $b$. It will first compare the signs of the two mp\_int variables. If they
+differ it will return immediately based on their signs. If the signs are equal then it will compare the digits
+individually. This function will return one of the compare conditions codes listed in figure \ref{fig:CMP}.
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int number1, number2;
+ int result;
+
+ if ((result = mp_init_multi(&number1, &number2, NULL)) != MP_OKAY) \{
+ printf("Error initializing the numbers. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* set the number1 to 5 */
+ mp_set(&number1, 5);
+
+ /* set the number2 to -6 */
+ mp_set(&number2, 6);
+ if ((result = mp_neg(&number2, &number2)) != MP_OKAY) \{
+ printf("Error negating number2. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ switch(mp_cmp(&number1, &number2)) \{
+ case MP_GT: printf("number1 > number2"); break;
+ case MP_EQ: printf("number1 = number2"); break;
+ case MP_LT: printf("number1 < number2"); break;
+ \}
+
+ /* we're done with it. */
+ mp_clear_multi(&number1, &number2, NULL);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+If this program\footnote{This function uses the mp\_neg() function which is discussed in section \ref{sec:NEG}.} completes
+successfully it should print the following.
+
+\begin{alltt}
+number1 > number2
+\end{alltt}
+
+\subsection{Single Digit}
+
+To compare a single digit against an mp\_int the following function has been provided.
+
+\index{mp\_cmp\_d}
+\begin{alltt}
+int mp_cmp_d(mp_int * a, mp_digit b);
+\end{alltt}
+
+This will compare $a$ to the left of $b$ using a signed comparison. Note that it will always treat $b$ as
+positive. This function is rather handy when you have to compare against small values such as $1$ (which often
+comes up in cryptography). The function cannot fail and will return one of the tree compare condition codes
+listed in figure \ref{fig:CMP}.
+
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int number;
+ int result;
+
+ if ((result = mp_init(&number)) != MP_OKAY) \{
+ printf("Error initializing the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* set the number to 5 */
+ mp_set(&number, 5);
+
+ switch(mp_cmp_d(&number, 7)) \{
+ case MP_GT: printf("number > 7"); break;
+ case MP_EQ: printf("number = 7"); break;
+ case MP_LT: printf("number < 7"); break;
+ \}
+
+ /* we're done with it. */
+ mp_clear(&number);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+If this program functions properly it will print out the following.
+
+\begin{alltt}
+number < 7
+\end{alltt}
+
+\section{Logical Operations}
+
+Logical operations are operations that can be performed either with simple shifts or boolean operators such as
+AND, XOR and OR directly. These operations are very quick.
+
+\subsection{Multiplication by two}
+
+Multiplications and divisions by any power of two can be performed with quick logical shifts either left or
+right depending on the operation.
+
+When multiplying or dividing by two a special case routine can be used which are as follows.
+\index{mp\_mul\_2} \index{mp\_div\_2}
+\begin{alltt}
+int mp_mul_2(mp_int * a, mp_int * b);
+int mp_div_2(mp_int * a, mp_int * b);
+\end{alltt}
+
+The former will assign twice $a$ to $b$ while the latter will assign half $a$ to $b$. These functions are fast
+since the shift counts and maskes are hardcoded into the routines.
+
+\begin{small} \begin{alltt}
+int main(void)
+\{
+ mp_int number;
+ int result;
+
+ if ((result = mp_init(&number)) != MP_OKAY) \{
+ printf("Error initializing the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* set the number to 5 */
+ mp_set(&number, 5);
+
+ /* multiply by two */
+ if ((result = mp\_mul\_2(&number, &number)) != MP_OKAY) \{
+ printf("Error multiplying the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+ switch(mp_cmp_d(&number, 7)) \{
+ case MP_GT: printf("2*number > 7"); break;
+ case MP_EQ: printf("2*number = 7"); break;
+ case MP_LT: printf("2*number < 7"); break;
+ \}
+
+ /* now divide by two */
+ if ((result = mp\_div\_2(&number, &number)) != MP_OKAY) \{
+ printf("Error dividing the number. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+ switch(mp_cmp_d(&number, 7)) \{
+ case MP_GT: printf("2*number/2 > 7"); break;
+ case MP_EQ: printf("2*number/2 = 7"); break;
+ case MP_LT: printf("2*number/2 < 7"); break;
+ \}
+
+ /* we're done with it. */
+ mp_clear(&number);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt} \end{small}
+
+If this program is successful it will print out the following text.
+
+\begin{alltt}
+2*number > 7
+2*number/2 < 7
+\end{alltt}
+
+Since $10 > 7$ and $5 < 7$. To multiply by a power of two the following function can be used.
+
+\index{mp\_mul\_2d}
+\begin{alltt}
+int mp_mul_2d(mp_int * a, int b, mp_int * c);
+\end{alltt}
+
+This will multiply $a$ by $2^b$ and store the result in ``c''. If the value of $b$ is less than or equal to
+zero the function will copy $a$ to ``c'' without performing any further actions.
+
+To divide by a power of two use the following.
+
+\index{mp\_div\_2d}
+\begin{alltt}
+int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d);
+\end{alltt}
+Which will divide $a$ by $2^b$, store the quotient in ``c'' and the remainder in ``d'. If $b \le 0$ then the
+function simply copies $a$ over to ``c'' and zeroes $d$. The variable $d$ may be passed as a \textbf{NULL}
+value to signal that the remainder is not desired.
+
+\subsection{Polynomial Basis Operations}
+
+Strictly speaking the organization of the integers within the mp\_int structures is what is known as a
+``polynomial basis''. This simply means a field element is stored by divisions of a radix. For example, if
+$f(x) = \sum_{i=0}^{k} y_ix^k$ for any vector $\vec y$ then the array of digits in $\vec y$ are said to be
+the polynomial basis representation of $z$ if $f(\beta) = z$ for a given radix $\beta$.
+
+To multiply by the polynomial $g(x) = x$ all you have todo is shift the digits of the basis left one place. The
+following function provides this operation.
+
+\index{mp\_lshd}
+\begin{alltt}
+int mp_lshd (mp_int * a, int b);
+\end{alltt}
+
+This will multiply $a$ in place by $x^b$ which is equivalent to shifting the digits left $b$ places and inserting zeroes
+in the least significant digits. Similarly to divide by a power of $x$ the following function is provided.
+
+\index{mp\_rshd}
+\begin{alltt}
+void mp_rshd (mp_int * a, int b)
+\end{alltt}
+This will divide $a$ in place by $x^b$ and discard the remainder. This function cannot fail as it performs the operations
+in place and no new digits are required to complete it.
+
+\subsection{AND, OR and XOR Operations}
+
+While AND, OR and XOR operations are not typical ``bignum functions'' they can be useful in several instances. The
+three functions are prototyped as follows.
+
+\index{mp\_or} \index{mp\_and} \index{mp\_xor}
+\begin{alltt}
+int mp_or (mp_int * a, mp_int * b, mp_int * c);
+int mp_and (mp_int * a, mp_int * b, mp_int * c);
+int mp_xor (mp_int * a, mp_int * b, mp_int * c);
+\end{alltt}
+
+Which compute $c = a \odot b$ where $\odot$ is one of OR, AND or XOR.
+
+\section{Addition and Subtraction}
+
+To compute an addition or subtraction the following two functions can be used.
+
+\index{mp\_add} \index{mp\_sub}
+\begin{alltt}
+int mp_add (mp_int * a, mp_int * b, mp_int * c);
+int mp_sub (mp_int * a, mp_int * b, mp_int * c)
+\end{alltt}
+
+Which perform $c = a \odot b$ where $\odot$ is one of signed addition or subtraction. The operations are fully sign
+aware.
+
+\section{Sign Manipulation}
+\subsection{Negation}
+\label{sec:NEG}
+Simple integer negation can be performed with the following.
+
+\index{mp\_neg}
+\begin{alltt}
+int mp_neg (mp_int * a, mp_int * b);
+\end{alltt}
+
+Which assigns $-a$ to $b$.
+
+\subsection{Absolute}
+Simple integer absolutes can be performed with the following.
+
+\index{mp\_neg}
+\begin{alltt}
+int mp_abs (mp_int * a, mp_int * b);
+\end{alltt}
+
+Which assigns $\vert a \vert$ to $b$.
+
+\section{Integer Division and Remainder}
+To perform a complete and general integer division with remainder use the following function.
+
+\index{mp\_div}
+\begin{alltt}
+int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d);
+\end{alltt}
+
+This divides $a$ by $b$ and stores the quotient in $c$ and $d$. The signed quotient is computed such that
+$bc + d = a$. Note that either of $c$ or $d$ can be set to \textbf{NULL} if their value is not required. If
+$b$ is zero the function returns \textbf{MP\_VAL}.
+
+
+\chapter{Multiplication and Squaring}
+\section{Multiplication}
+A full signed integer multiplication can be performed with the following.
+\index{mp\_mul}
+\begin{alltt}
+int mp_mul (mp_int * a, mp_int * b, mp_int * c);
+\end{alltt}
+Which assigns the full signed product $ab$ to $c$. This function actually breaks into one of four cases which are
+specific multiplication routines optimized for given parameters. First there are the Toom-Cook multiplications which
+should only be used with very large inputs. This is followed by the Karatsuba multiplications which are for moderate
+sized inputs. Then followed by the Comba and baseline multipliers.
+
+Fortunately for the developer you don't really need to know this unless you really want to fine tune the system. mp\_mul()
+will determine on its own\footnote{Some tweaking may be required.} what routine to use automatically when it is called.
+
+\begin{alltt}
+int main(void)
+\{
+ mp_int number1, number2;
+ int result;
+
+ /* Initialize the numbers */
+ if ((result = mp_init_multi(&number1,
+ &number2, NULL)) != MP_OKAY) \{
+ printf("Error initializing the numbers. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* set the terms */
+ if ((result = mp_set_int(&number, 257)) != MP_OKAY) \{
+ printf("Error setting number1. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ if ((result = mp_set_int(&number2, 1023)) != MP_OKAY) \{
+ printf("Error setting number2. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* multiply them */
+ if ((result = mp_mul(&number1, &number2,
+ &number1)) != MP_OKAY) \{
+ printf("Error multiplying terms. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* display */
+ printf("number1 * number2 == \%lu", mp_get_int(&number1));
+
+ /* free terms and return */
+ mp_clear_multi(&number1, &number2, NULL);
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt}
+
+If this program succeeds it shall output the following.
+
+\begin{alltt}
+number1 * number2 == 262911
+\end{alltt}
+
+\section{Squaring}
+Since squaring can be performed faster than multiplication it is performed it's own function instead of just using
+mp\_mul().
+
+\index{mp\_sqr}
+\begin{alltt}
+int mp_sqr (mp_int * a, mp_int * b);
+\end{alltt}
+
+Will square $a$ and store it in $b$. Like the case of multiplication there are four different squaring
+algorithms all which can be called from mp\_sqr(). It is ideal to use mp\_sqr over mp\_mul when squaring terms.
+
+\section{Tuning Polynomial Basis Routines}
+
+Both of the Toom-Cook and Karatsuba multiplication algorithms are faster than the traditional $O(n^2)$ approach that
+the Comba and baseline algorithms use. At $O(n^{1.464973})$ and $O(n^{1.584962})$ running times respectfully they require
+considerably less work. For example, a 10000-digit multiplication would take roughly 724,000 single precision
+multiplications with Toom-Cook or 100,000,000 single precision multiplications with the standard Comba (a factor
+of 138).
+
+So why not always use Karatsuba or Toom-Cook? The simple answer is that they have so much overhead that they're not
+actually faster than Comba until you hit distinct ``cutoff'' points. For Karatsuba with the default configuration,
+GCC 3.3.1 and an Athlon XP processor the cutoff point is roughly 110 digits (about 70 for the Intel P4). That is, at
+110 digits Karatsuba and Comba multiplications just about break even and for 110+ digits Karatsuba is faster.
+
+Toom-Cook has incredible overhead and is probably only useful for very large inputs. So far no known cutoff points
+exist and for the most part I just set the cutoff points very high to make sure they're not called.
+
+A demo program in the ``etc/'' directory of the project called ``tune.c'' can be used to find the cutoff points. This
+can be built with GCC as follows
+
+\begin{alltt}
+make XXX
+\end{alltt}
+Where ``XXX'' is one of the following entries from the table \ref{fig:tuning}.
+
+\begin{figure}[here]
+\begin{center}
+\begin{small}
+\begin{tabular}{|l|l|}
+\hline \textbf{Value of XXX} & \textbf{Meaning} \\
+\hline tune & Builds portable tuning application \\
+\hline tune86 & Builds x86 (pentium and up) program for COFF \\
+\hline tune86c & Builds x86 program for Cygwin \\
+\hline tune86l & Builds x86 program for Linux (ELF format) \\
+\hline
+\end{tabular}
+\end{small}
+\end{center}
+\caption{Build Names for Tuning Programs}
+\label{fig:tuning}
+\end{figure}
+
+When the program is running it will output a series of measurements for different cutoff points. It will first find
+good Karatsuba squaring and multiplication points. Then it proceeds to find Toom-Cook points. Note that the Toom-Cook
+tuning takes a very long time as the cutoff points are likely to be very high.
+
+\chapter{Modular Reduction}
+
+Modular reduction is process of taking the remainder of one quantity divided by another. Expressed
+as (\ref{eqn:mod}) the modular reduction is equivalent to the remainder of $b$ divided by $c$.
+
+\begin{equation}
+a \equiv b \mbox{ (mod }c\mbox{)}
+\label{eqn:mod}
+\end{equation}
+
+Of particular interest to cryptography are reductions where $b$ is limited to the range $0 \le b < c^2$ since particularly
+fast reduction algorithms can be written for the limited range.
+
+Note that one of the four optimized reduction algorithms are automatically chosen in the modular exponentiation
+algorithm mp\_exptmod when an appropriate modulus is detected.
+
+\section{Straight Division}
+In order to effect an arbitrary modular reduction the following algorithm is provided.
+
+\index{mp\_mod}
+\begin{alltt}
+int mp_mod(mp_int *a, mp_int *b, mp_int *c);
+\end{alltt}
+
+This reduces $a$ modulo $b$ and stores the result in $c$. The sign of $c$ shall agree with the sign
+of $b$. This algorithm accepts an input $a$ of any range and is not limited by $0 \le a < b^2$.
+
+\section{Barrett Reduction}
+
+Barrett reduction is a generic optimized reduction algorithm that requires pre--computation to achieve
+a decent speedup over straight division. First a $mu$ value must be precomputed with the following function.
+
+\index{mp\_reduce\_setup}
+\begin{alltt}
+int mp_reduce_setup(mp_int *a, mp_int *b);
+\end{alltt}
+
+Given a modulus in $b$ this produces the required $mu$ value in $a$. For any given modulus this only has to
+be computed once. Modular reduction can now be performed with the following.
+
+\index{mp\_reduce}
+\begin{alltt}
+int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
+\end{alltt}
+
+This will reduce $a$ in place modulo $b$ with the precomputed $mu$ value in $c$. $a$ must be in the range
+$0 \le a < b^2$.
+
+\begin{alltt}
+int main(void)
+\{
+ mp_int a, b, c, mu;
+ int result;
+
+ /* initialize a,b to desired values, mp_init mu,
+ * c and set c to 1...we want to compute a^3 mod b
+ */
+
+ /* get mu value */
+ if ((result = mp_reduce_setup(&mu, b)) != MP_OKAY) \{
+ printf("Error getting mu. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* square a to get c = a^2 */
+ if ((result = mp_sqr(&a, &c)) != MP_OKAY) \{
+ printf("Error squaring. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* now reduce `c' modulo b */
+ if ((result = mp_reduce(&c, &b, &mu)) != MP_OKAY) \{
+ printf("Error reducing. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* multiply a to get c = a^3 */
+ if ((result = mp_mul(&a, &c, &c)) != MP_OKAY) \{
+ printf("Error reducing. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* now reduce `c' modulo b */
+ if ((result = mp_reduce(&c, &b, &mu)) != MP_OKAY) \{
+ printf("Error reducing. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* c now equals a^3 mod b */
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt}
+
+This program will calculate $a^3 \mbox{ mod }b$ if all the functions succeed.
+
+\section{Montgomery Reduction}
+
+Montgomery is a specialized reduction algorithm for any odd moduli. Like Barrett reduction a pre--computation
+step is required. This is accomplished with the following.
+
+\index{mp\_montgomery\_setup}
+\begin{alltt}
+int mp_montgomery_setup(mp_int *a, mp_digit *mp);
+\end{alltt}
+
+For the given odd moduli $a$ the precomputation value is placed in $mp$. The reduction is computed with the
+following.
+
+\index{mp\_montgomery\_reduce}
+\begin{alltt}
+int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
+\end{alltt}
+This reduces $a$ in place modulo $m$ with the pre--computed value $mp$. $a$ must be in the range
+$0 \le a < b^2$.
+
+Montgomery reduction is faster than Barrett reduction for moduli smaller than the ``comba'' limit. With the default
+setup for instance, the limit is $127$ digits ($3556$--bits). Note that this function is not limited to
+$127$ digits just that it falls back to a baseline algorithm after that point.
+
+An important observation is that this reduction does not return $a \mbox{ mod }m$ but $aR^{-1} \mbox{ mod }m$
+where $R = \beta^n$, $n$ is the n number of digits in $m$ and $\beta$ is radix used (default is $2^{28}$).
+
+To quickly calculate $R$ the following function was provided.
+
+\index{mp\_montgomery\_calc\_normalization}
+\begin{alltt}
+int mp_montgomery_calc_normalization(mp_int *a, mp_int *b);
+\end{alltt}
+Which calculates $a = R$ for the odd moduli $b$ without using multiplication or division.
+
+The normal modus operandi for Montgomery reductions is to normalize the integers before entering the system. For
+example, to calculate $a^3 \mbox { mod }b$ using Montgomery reduction the value of $a$ can be normalized by
+multiplying it by $R$. Consider the following code snippet.
+
+\begin{alltt}
+int main(void)
+\{
+ mp_int a, b, c, R;
+ mp_digit mp;
+ int result;
+
+ /* initialize a,b to desired values,
+ * mp_init R, c and set c to 1....
+ */
+
+ /* get normalization */
+ if ((result = mp_montgomery_calc_normalization(&R, b)) != MP_OKAY) \{
+ printf("Error getting norm. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* get mp value */
+ if ((result = mp_montgomery_setup(&c, &mp)) != MP_OKAY) \{
+ printf("Error setting up montgomery. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* normalize `a' so now a is equal to aR */
+ if ((result = mp_mulmod(&a, &R, &b, &a)) != MP_OKAY) \{
+ printf("Error computing aR. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* square a to get c = a^2R^2 */
+ if ((result = mp_sqr(&a, &c)) != MP_OKAY) \{
+ printf("Error squaring. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* now reduce `c' back down to c = a^2R^2 * R^-1 == a^2R */
+ if ((result = mp_montgomery_reduce(&c, &b, mp)) != MP_OKAY) \{
+ printf("Error reducing. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* multiply a to get c = a^3R^2 */
+ if ((result = mp_mul(&a, &c, &c)) != MP_OKAY) \{
+ printf("Error reducing. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* now reduce `c' back down to c = a^3R^2 * R^-1 == a^3R */
+ if ((result = mp_montgomery_reduce(&c, &b, mp)) != MP_OKAY) \{
+ printf("Error reducing. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* now reduce (again) `c' back down to c = a^3R * R^-1 == a^3 */
+ if ((result = mp_montgomery_reduce(&c, &b, mp)) != MP_OKAY) \{
+ printf("Error reducing. \%s",
+ mp_error_to_string(result));
+ return EXIT_FAILURE;
+ \}
+
+ /* c now equals a^3 mod b */
+
+ return EXIT_SUCCESS;
+\}
+\end{alltt}
+
+This particular example does not look too efficient but it demonstrates the point of the algorithm. By
+normalizing the inputs the reduced results are always of the form $aR$ for some variable $a$. This allows
+a single final reduction to correct for the normalization and the fast reduction used within the algorithm.
+
+For more details consider examining the file \textit{bn\_mp\_exptmod\_fast.c}.
+
+\section{Restricted Dimminished Radix}
+
+``Dimminished Radix'' reduction refers to reduction with respect to moduli that are ameniable to simple
+digit shifting and small multiplications. In this case the ``restricted'' variant refers to moduli of the
+form $\beta^k - p$ for some $k \ge 0$ and $0 < p < \beta$ where $\beta$ is the radix (default to $2^{28}$).
+
+As in the case of Montgomery reduction there is a pre--computation phase required for a given modulus.
+
+\index{mp\_dr\_setup}
+\begin{alltt}
+void mp_dr_setup(mp_int *a, mp_digit *d);
+\end{alltt}
+
+This computes the value required for the modulus $a$ and stores it in $d$. This function cannot fail
+and does not return any error codes. After the pre--computation a reduction can be performed with the
+following.
+
+\index{mp\_dr\_reduce}
+\begin{alltt}
+int mp_dr_reduce(mp_int *a, mp_int *b, mp_digit mp);
+\end{alltt}
+
+This reduces $a$ in place modulo $b$ with the pre--computed value $mp$. $b$ must be of a restricted
+dimminished radix form and $a$ must be in the range $0 \le a < b^2$. Dimminished radix reductions are
+much faster than both Barrett and Montgomery reductions as they have a much lower asymtotic running time.
+
+Since the moduli are restricted this algorithm is not particularly useful for something like Rabin, RSA or
+BBS cryptographic purposes. This reduction algorithm is useful for Diffie-Hellman and ECC where fixed
+primes are acceptable.
+
+Note that unlike Montgomery reduction there is no normalization process. The result of this function is
+equal to the correct residue.
+
+\section{Unrestricted Dimminshed Radix}
+
+Unrestricted reductions work much like the restricted counterparts except in this case the moduli is of the
+form $2^k - p$ for $0 < p < \beta$. In this sense the unrestricted reductions are more flexible as they
+can be applied to a wider range of numbers.
+
+\index{mp\_reduce\_2k\_setup}
+\begin{alltt}
+int mp_reduce_2k_setup(mp_int *a, mp_digit *d);
+\end{alltt}
+
+This will compute the required $d$ value for the given moduli $a$.
+
+\index{mp\_reduce\_2k}
+\begin{alltt}
+int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d);
+\end{alltt}
+
+This will reduce $a$ in place modulo $n$ with the pre--computed value $d$. From my experience this routine is
+slower than mp\_dr\_reduce but faster for most moduli sizes than the Montgomery reduction.
+
+\chapter{Exponentiation}
+\section{Single Digit Exponentiation}
+\index{mp\_expt\_d}
+\begin{alltt}
+int mp_expt_d (mp_int * a, mp_digit b, mp_int * c)
+\end{alltt}
+This computes $c = a^b$ using a simple binary left-to-right algorithm. It is faster than repeated multiplications by
+$a$ for all values of $b$ greater than three.
+
+\section{Modular Exponentiation}
+\index{mp\_exptmod}
+\begin{alltt}
+int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+\end{alltt}
+This computes $Y \equiv G^X \mbox{ (mod }P\mbox{)}$ using a variable width sliding window algorithm. This function
+will automatically detect the fastest modular reduction technique to use during the operation. For negative values of
+$X$ the operation is performed as $Y \equiv (G^{-1} \mbox{ mod }P)^{\vert X \vert} \mbox{ (mod }P\mbox{)}$ provided that
+$gcd(G, P) = 1$.
+
+This function is actually a shell around the two internal exponentiation functions. This routine will automatically
+detect when Barrett, Montgomery, Restricted and Unrestricted Dimminished Radix based exponentiation can be used. Generally
+moduli of the a ``restricted dimminished radix'' form lead to the fastest modular exponentiations. Followed by Montgomery
+and the other two algorithms.
+
+\section{Root Finding}
+\index{mp\_n\_root}
+\begin{alltt}
+int mp_n_root (mp_int * a, mp_digit b, mp_int * c)
+\end{alltt}
+This computes $c = a^{1/b}$ such that $c^b \le a$ and $(c+1)^b > a$. The implementation of this function is not
+ideal for values of $b$ greater than three. It will work but become very slow. So unless you are working with very small
+numbers (less than 1000 bits) I'd avoid $b > 3$ situations. Will return a positive root only for even roots and return
+a root with the sign of the input for odd roots. For example, performing $4^{1/2}$ will return $2$ whereas $(-8)^{1/3}$
+will return $-2$.
+
+This algorithm uses the ``Newton Approximation'' method and will converge on the correct root fairly quickly. Since
+the algorithm requires raising $a$ to the power of $b$ it is not ideal to attempt to find roots for large
+values of $b$. If particularly large roots are required then a factor method could be used instead. For example,
+$a^{1/16}$ is equivalent to $\left (a^{1/4} \right)^{1/4}$.
+
+\chapter{Prime Numbers}
+\section{Trial Division}
+\index{mp\_prime\_is\_divisible}
+\begin{alltt}
+int mp_prime_is_divisible (mp_int * a, int *result)
+\end{alltt}
+This will attempt to evenly divide $a$ by a list of primes\footnote{Default is the first 256 primes.} and store the
+outcome in ``result''. That is if $result = 0$ then $a$ is not divisible by the primes, otherwise it is. Note that
+if the function does not return \textbf{MP\_OKAY} the value in ``result'' should be considered undefined\footnote{Currently
+the default is to set it to zero first.}.
+
+\section{Fermat Test}
+\index{mp\_prime\_fermat}
+\begin{alltt}
+int mp_prime_fermat (mp_int * a, mp_int * b, int *result)
+\end{alltt}
+Performs a Fermat primality test to the base $b$. That is it computes $b^a \mbox{ mod }a$ and tests whether the value is
+equal to $b$ or not. If the values are equal then $a$ is probably prime and $result$ is set to one. Otherwise $result$
+is set to zero.
+
+\section{Miller-Rabin Test}
+\index{mp\_prime\_miller\_rabin}
+\begin{alltt}
+int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
+\end{alltt}
+Performs a Miller-Rabin test to the base $b$ of $a$. This test is much stronger than the Fermat test and is very hard to
+fool (besides with Carmichael numbers). If $a$ passes the test (therefore is probably prime) $result$ is set to one.
+Otherwise $result$ is set to zero.
+
+Note that is suggested that you use the Miller-Rabin test instead of the Fermat test since all of the failures of
+Miller-Rabin are a subset of the failures of the Fermat test.
+
+\subsection{Required Number of Tests}
+Generally to ensure a number is very likely to be prime you have to perform the Miller-Rabin with at least a half-dozen
+or so unique bases. However, it has been proven that the probability of failure goes down as the size of the input goes up.
+This is why a simple function has been provided to help out.
+
+\index{mp\_prime\_rabin\_miller\_trials}
+\begin{alltt}
+int mp_prime_rabin_miller_trials(int size)
+\end{alltt}
+This returns the number of trials required for a $2^{-96}$ (or lower) probability of failure for a given ``size'' expressed
+in bits. This comes in handy specially since larger numbers are slower to test. For example, a 512-bit number would
+require ten tests whereas a 1024-bit number would only require four tests.
+
+You should always still perform a trial division before a Miller-Rabin test though.
+
+\section{Primality Testing}
+\index{mp\_prime\_is\_prime}
+\begin{alltt}
+int mp_prime_is_prime (mp_int * a, int t, int *result)
+\end{alltt}
+This will perform a trial division followed by $t$ rounds of Miller-Rabin tests on $a$ and store the result in $result$.
+If $a$ passes all of the tests $result$ is set to one, otherwise it is set to zero. Note that $t$ is bounded by
+$1 \le t < PRIME\_SIZE$ where $PRIME\_SIZE$ is the number of primes in the prime number table (by default this is $256$).
+
+\section{Next Prime}
+\index{mp\_prime\_next\_prime}
+\begin{alltt}
+int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
+\end{alltt}
+This finds the next prime after $a$ that passes mp\_prime\_is\_prime() with $t$ tests. Set $bbs\_style$ to one if you
+want only the next prime congruent to $3 \mbox{ mod } 4$, otherwise set it to zero to find any next prime.
+
+\section{Random Primes}
+\index{mp\_prime\_random}
+\begin{alltt}
+int mp_prime_random(mp_int *a, int t, int size, int bbs,
+ ltm_prime_callback cb, void *dat)
+\end{alltt}
+This will find a prime greater than $256^{size}$ which can be ``bbs\_style'' or not depending on $bbs$ and must pass
+$t$ rounds of tests. The ``ltm\_prime\_callback'' is a typedef for
+
+\begin{alltt}
+typedef int ltm_prime_callback(unsigned char *dst, int len, void *dat);
+\end{alltt}
+
+Which is a function that must read $len$ bytes (and return the amount stored) into $dst$. The $dat$ variable is simply
+copied from the original input. It can be used to pass RNG context data to the callback. The function
+mp\_prime\_random() is more suitable for generating primes which must be secret (as in the case of RSA) since there
+is no skew on the least significant bits.
+
+\textit{Note:} As of v0.30 of the LibTomMath library this function has been deprecated. It is still available
+but users are encouraged to use the new mp\_prime\_random\_ex() function instead.
+
+\subsection{Extended Generation}
+\index{mp\_prime\_random\_ex}
+\begin{alltt}
+int mp_prime_random_ex(mp_int *a, int t,
+ int size, int flags,
+ ltm_prime_callback cb, void *dat);
+\end{alltt}
+This will generate a prime in $a$ using $t$ tests of the primality testing algorithms. The variable $size$
+specifies the bit length of the prime desired. The variable $flags$ specifies one of several options available
+(see fig. \ref{fig:primeopts}) which can be OR'ed together. The callback parameters are used as in
+mp\_prime\_random().
+
+\begin{figure}[here]
+\begin{center}
+\begin{small}
+\begin{tabular}{|r|l|}
+\hline \textbf{Flag} & \textbf{Meaning} \\
+\hline LTM\_PRIME\_BBS & Make the prime congruent to $3$ modulo $4$ \\
+\hline LTM\_PRIME\_SAFE & Make a prime $p$ such that $(p - 1)/2$ is also prime. \\
+ & This option implies LTM\_PRIME\_BBS as well. \\
+\hline LTM\_PRIME\_2MSB\_OFF & Makes sure that the bit adjacent to the most significant bit \\
+ & Is forced to zero. \\
+\hline LTM\_PRIME\_2MSB\_ON & Makes sure that the bit adjacent to the most significant bit \\
+ & Is forced to one. \\
+\hline
+\end{tabular}
+\end{small}
+\end{center}
+\caption{Primality Generation Options}
+\label{fig:primeopts}
+\end{figure}
+
+\chapter{Input and Output}
+\section{ASCII Conversions}
+\subsection{To ASCII}
+\index{mp\_toradix}
+\begin{alltt}
+int mp_toradix (mp_int * a, char *str, int radix);
+\end{alltt}
+This still store $a$ in ``str'' as a base-``radix'' string of ASCII chars. This function appends a NUL character
+to terminate the string. Valid values of ``radix'' line in the range $[2, 64]$. To determine the size (exact) required
+by the conversion before storing any data use the following function.
+
+\index{mp\_radix\_size}
+\begin{alltt}
+int mp_radix_size (mp_int * a, int radix, int *size)
+\end{alltt}
+This stores in ``size'' the number of characters (including space for the NUL terminator) required. Upon error this
+function returns an error code and ``size'' will be zero.
+
+\subsection{From ASCII}
+\index{mp\_read\_radix}
+\begin{alltt}
+int mp_read_radix (mp_int * a, char *str, int radix);
+\end{alltt}
+This will read the base-``radix'' NUL terminated string from ``str'' into $a$. It will stop reading when it reads a
+character it does not recognize (which happens to include th NUL char... imagine that...). A single leading $-$ sign
+can be used to denote a negative number.
+
+\section{Binary Conversions}
+
+Converting an mp\_int to and from binary is another keen idea.
+
+\index{mp\_unsigned\_bin\_size}
+\begin{alltt}
+int mp_unsigned_bin_size(mp_int *a);
+\end{alltt}
+
+This will return the number of bytes (octets) required to store the unsigned copy of the integer $a$.
+
+\index{mp\_to\_unsigned\_bin}
+\begin{alltt}
+int mp_to_unsigned_bin(mp_int *a, unsigned char *b);
+\end{alltt}
+This will store $a$ into the buffer $b$ in big--endian format. Fortunately this is exactly what DER (or is it ASN?)
+requires. It does not store the sign of the integer.
+
+\index{mp\_read\_unsigned\_bin}
+\begin{alltt}
+int mp_read_unsigned_bin(mp_int *a, unsigned char *b, int c);
+\end{alltt}
+This will read in an unsigned big--endian array of bytes (octets) from $b$ of length $c$ into $a$. The resulting
+integer $a$ will always be positive.
+
+For those who acknowledge the existence of negative numbers (heretic!) there are ``signed'' versions of the
+previous functions.
+
+\begin{alltt}
+int mp_signed_bin_size(mp_int *a);
+int mp_read_signed_bin(mp_int *a, unsigned char *b, int c);
+int mp_to_signed_bin(mp_int *a, unsigned char *b);
+\end{alltt}
+They operate essentially the same as the unsigned copies except they prefix the data with zero or non--zero
+byte depending on the sign. If the sign is zpos (e.g. not negative) the prefix is zero, otherwise the prefix
+is non--zero.
+
+\chapter{Algebraic Functions}
+\section{Extended Euclidean Algorithm}
+\index{mp\_exteuclid}
+\begin{alltt}
+int mp_exteuclid(mp_int *a, mp_int *b,
+ mp_int *U1, mp_int *U2, mp_int *U3);
+\end{alltt}
+
+This finds the triple U1/U2/U3 using the Extended Euclidean algorithm such that the following equation holds.
+
+\begin{equation}
+a \cdot U1 + b \cdot U2 = U3
+\end{equation}
+
+Any of the U1/U2/U3 paramters can be set to \textbf{NULL} if they are not desired.
+
+\section{Greatest Common Divisor}
+\index{mp\_gcd}
+\begin{alltt}
+int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
+\end{alltt}
+This will compute the greatest common divisor of $a$ and $b$ and store it in $c$.
+
+\section{Least Common Multiple}
+\index{mp\_lcm}
+\begin{alltt}
+int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
+\end{alltt}
+This will compute the least common multiple of $a$ and $b$ and store it in $c$.
+
+\section{Jacobi Symbol}
+\index{mp\_jacobi}
+\begin{alltt}
+int mp_jacobi (mp_int * a, mp_int * p, int *c)
+\end{alltt}
+This will compute the Jacobi symbol for $a$ with respect to $p$. If $p$ is prime this essentially computes the Legendre
+symbol. The result is stored in $c$ and can take on one of three values $\lbrace -1, 0, 1 \rbrace$. If $p$ is prime
+then the result will be $-1$ when $a$ is not a quadratic residue modulo $p$. The result will be $0$ if $a$ divides $p$
+and the result will be $1$ if $a$ is a quadratic residue modulo $p$.
+
+\section{Modular Inverse}
+\index{mp\_invmod}
+\begin{alltt}
+int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
+\end{alltt}
+Computes the multiplicative inverse of $a$ modulo $b$ and stores the result in $c$ such that $ac \equiv 1 \mbox{ (mod }b\mbox{)}$.
+
+\section{Single Digit Functions}
+
+For those using small numbers (\textit{snicker snicker}) there are several ``helper'' functions
+
+\index{mp\_add\_d} \index{mp\_sub\_d} \index{mp\_mul\_d} \index{mp\_div\_d} \index{mp\_mod\_d}
+\begin{alltt}
+int mp_add_d(mp_int *a, mp_digit b, mp_int *c);
+int mp_sub_d(mp_int *a, mp_digit b, mp_int *c);
+int mp_mul_d(mp_int *a, mp_digit b, mp_int *c);
+int mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d);
+int mp_mod_d(mp_int *a, mp_digit b, mp_digit *c);
+\end{alltt}
+
+These work like the full mp\_int capable variants except the second parameter $b$ is a mp\_digit. These
+functions fairly handy if you have to work with relatively small numbers since you will not have to allocate
+an entire mp\_int to store a number like $1$ or $2$.
+
+\input{bn.ind}
+
+\end{document}
============================================================
--- poster.out da39a3ee5e6b4b0d3255bfef95601890afd80709
+++ poster.out da39a3ee5e6b4b0d3255bfef95601890afd80709
============================================================
# poster.pdf is binary
============================================================
--- poster.tex 1fe3768a9d3d68873cd1b468f34e6b0474ac29dc
+++ poster.tex 1fe3768a9d3d68873cd1b468f34e6b0474ac29dc
@@ -0,0 +1,35 @@
+\documentclass[landscape,11pt]{article}
+\usepackage{amsmath, amssymb}
+\usepackage{hyperref}
+\begin{document}
+\hspace*{-3in}
+\begin{tabular}{llllll}
+$c = a + b$ & {\tt mp\_add(\&a, \&b, \&c)} & $b = 2a$ & {\tt mp\_mul\_2(\&a, \&b)} & \\
+$c = a - b$ & {\tt mp\_sub(\&a, \&b, \&c)} & $b = a/2$ & {\tt mp\_div\_2(\&a, \&b)} & \\
+$c = ab $ & {\tt mp\_mul(\&a, \&b, \&c)} & $c = 2^ba$ & {\tt mp\_mul\_2d(\&a, b, \&c)} \\
+$b = a^2 $ & {\tt mp\_sqr(\&a, \&b)} & $c = a/2^b, d = a \mod 2^b$ & {\tt mp\_div\_2d(\&a, b, \&c, \&d)} \\
+$c = \lfloor a/b \rfloor, d = a \mod b$ & {\tt mp\_div(\&a, \&b, \&c, \&d)} & $c = a \mod 2^b $ & {\tt mp\_mod\_2d(\&a, b, \&c)} \\
+ && \\
+$a = b $ & {\tt mp\_set\_int(\&a, b)} & $c = a \vee b$ & {\tt mp\_or(\&a, \&b, \&c)} \\
+$b = a $ & {\tt mp\_copy(\&a, \&b)} & $c = a \wedge b$ & {\tt mp\_and(\&a, \&b, \&c)} \\
+ && $c = a \oplus b$ & {\tt mp\_xor(\&a, \&b, \&c)} \\
+ & \\
+$b = -a $ & {\tt mp\_neg(\&a, \&b)} & $d = a + b \mod c$ & {\tt mp\_addmod(\&a, \&b, \&c, \&d)} \\
+$b = |a| $ & {\tt mp\_abs(\&a, \&b)} & $d = a - b \mod c$ & {\tt mp\_submod(\&a, \&b, \&c, \&d)} \\
+ && $d = ab \mod c$ & {\tt mp\_mulmod(\&a, \&b, \&c, \&d)} \\
+Compare $a$ and $b$ & {\tt mp\_cmp(\&a, \&b)} & $c = a^2 \mod b$ & {\tt mp\_sqrmod(\&a, \&b, \&c)} \\
+Is Zero? & {\tt mp\_iszero(\&a)} & $c = a^{-1} \mod b$ & {\tt mp\_invmod(\&a, \&b, \&c)} \\
+Is Even? & {\tt mp\_iseven(\&a)} & $d = a^b \mod c$ & {\tt mp\_exptmod(\&a, \&b, \&c, \&d)} \\
+Is Odd ? & {\tt mp\_isodd(\&a)} \\
+&\\
+$\vert \vert a \vert \vert$ & {\tt mp\_unsigned\_bin\_size(\&a)} & $res$ = 1 if $a$ prime to $t$ rounds? & {\tt mp\_prime\_is\_prime(\&a, t, \&res)} \\
+$buf \leftarrow a$ & {\tt mp\_to\_unsigned\_bin(\&a, buf)} & Next prime after $a$ to $t$ rounds. & {\tt mp\_prime\_next\_prime(\&a, t, bbs\_style)} \\
+$a \leftarrow buf[0..len-1]$ & {\tt mp\_read\_unsigned\_bin(\&a, buf, len)} \\
+&\\
+$b = \sqrt{a}$ & {\tt mp\_sqrt(\&a, \&b)} & $c = \mbox{gcd}(a, b)$ & {\tt mp\_gcd(\&a, \&b, \&c)} \\
+$c = a^{1/b}$ & {\tt mp\_n\_root(\&a, b, \&c)} & $c = \mbox{lcm}(a, b)$ & {\tt mp\_lcm(\&a, \&b, \&c)} \\
+&\\
+Greater Than & MP\_GT & Equal To & MP\_EQ \\
+Less Than & MP\_LT & Bits per digit & DIGIT\_BIT \\
+\end{tabular}
+\end{document}
============================================================
--- tommath.out 8b1ec2f5e8f380908d72001636ea44581d64bf76
+++ tommath.out 8b1ec2f5e8f380908d72001636ea44581d64bf76
@@ -0,0 +1,139 @@
+\BOOKMARK [0][-]{chapter.1}{Introduction}{}
+\BOOKMARK [1][-]{section.1.1}{Multiple Precision Arithmetic}{chapter.1}
+\BOOKMARK [2][-]{subsection.1.1.1}{What is Multiple Precision Arithmetic?}{section.1.1}
+\BOOKMARK [2][-]{subsection.1.1.2}{The Need for Multiple Precision Arithmetic}{section.1.1}
+\BOOKMARK [2][-]{subsection.1.1.3}{Benefits of Multiple Precision Arithmetic}{section.1.1}
+\BOOKMARK [1][-]{section.1.2}{Purpose of This Text}{chapter.1}
+\BOOKMARK [1][-]{section.1.3}{Discussion and Notation}{chapter.1}
+\BOOKMARK [2][-]{subsection.1.3.1}{Notation}{section.1.3}
+\BOOKMARK [2][-]{subsection.1.3.2}{Precision Notation}{section.1.3}
+\BOOKMARK [2][-]{subsection.1.3.3}{Algorithm Inputs and Outputs}{section.1.3}
+\BOOKMARK [2][-]{subsection.1.3.4}{Mathematical Expressions}{section.1.3}
+\BOOKMARK [2][-]{subsection.1.3.5}{Work Effort}{section.1.3}
+\BOOKMARK [1][-]{section.1.4}{Exercises}{chapter.1}
+\BOOKMARK [1][-]{section.1.5}{Introduction to LibTomMath}{chapter.1}
+\BOOKMARK [2][-]{subsection.1.5.1}{What is LibTomMath?}{section.1.5}
+\BOOKMARK [2][-]{subsection.1.5.2}{Goals of LibTomMath}{section.1.5}
+\BOOKMARK [1][-]{section.1.6}{Choice of LibTomMath}{chapter.1}
+\BOOKMARK [2][-]{subsection.1.6.1}{Code Base}{section.1.6}
+\BOOKMARK [2][-]{subsection.1.6.2}{API Simplicity}{section.1.6}
+\BOOKMARK [2][-]{subsection.1.6.3}{Optimizations}{section.1.6}
+\BOOKMARK [2][-]{subsection.1.6.4}{Portability and Stability}{section.1.6}
+\BOOKMARK [2][-]{subsection.1.6.5}{Choice}{section.1.6}
+\BOOKMARK [0][-]{chapter.2}{Getting Started}{}
+\BOOKMARK [1][-]{section.2.1}{Library Basics}{chapter.2}
+\BOOKMARK [1][-]{section.2.2}{What is a Multiple Precision Integer?}{chapter.2}
+\BOOKMARK [2][-]{subsection.2.2.1}{The mp\137int Structure}{section.2.2}
+\BOOKMARK [1][-]{section.2.3}{Argument Passing}{chapter.2}
+\BOOKMARK [1][-]{section.2.4}{Return Values}{chapter.2}
+\BOOKMARK [1][-]{section.2.5}{Initialization and Clearing}{chapter.2}
+\BOOKMARK [2][-]{subsection.2.5.1}{Initializing an mp\137int}{section.2.5}
+\BOOKMARK [2][-]{subsection.2.5.2}{Clearing an mp\137int}{section.2.5}
+\BOOKMARK [1][-]{section.2.6}{Maintenance Algorithms}{chapter.2}
+\BOOKMARK [2][-]{subsection.2.6.1}{Augmenting an mp\137int's Precision}{section.2.6}
+\BOOKMARK [2][-]{subsection.2.6.2}{Initializing Variable Precision mp\137ints}{section.2.6}
+\BOOKMARK [2][-]{subsection.2.6.3}{Multiple Integer Initializations and Clearings}{section.2.6}
+\BOOKMARK [2][-]{subsection.2.6.4}{Clamping Excess Digits}{section.2.6}
+\BOOKMARK [0][-]{chapter.3}{Basic Operations}{}
+\BOOKMARK [1][-]{section.3.1}{Introduction}{chapter.3}
+\BOOKMARK [1][-]{section.3.2}{Assigning Values to mp\137int Structures}{chapter.3}
+\BOOKMARK [2][-]{subsection.3.2.1}{Copying an mp\137int}{section.3.2}
+\BOOKMARK [2][-]{subsection.3.2.2}{Creating a Clone}{section.3.2}
+\BOOKMARK [1][-]{section.3.3}{Zeroing an Integer}{chapter.3}
+\BOOKMARK [1][-]{section.3.4}{Sign Manipulation}{chapter.3}
+\BOOKMARK [2][-]{subsection.3.4.1}{Absolute Value}{section.3.4}
+\BOOKMARK [2][-]{subsection.3.4.2}{Integer Negation}{section.3.4}
+\BOOKMARK [1][-]{section.3.5}{Small Constants}{chapter.3}
+\BOOKMARK [2][-]{subsection.3.5.1}{Setting Small Constants}{section.3.5}
+\BOOKMARK [2][-]{subsection.3.5.2}{Setting Large Constants}{section.3.5}
+\BOOKMARK [1][-]{section.3.6}{Comparisons}{chapter.3}
+\BOOKMARK [2][-]{subsection.3.6.1}{Unsigned Comparisions}{section.3.6}
+\BOOKMARK [2][-]{subsection.3.6.2}{Signed Comparisons}{section.3.6}
+\BOOKMARK [0][-]{chapter.4}{Basic Arithmetic}{}
+\BOOKMARK [1][-]{section.4.1}{Introduction}{chapter.4}
+\BOOKMARK [1][-]{section.4.2}{Addition and Subtraction}{chapter.4}
+\BOOKMARK [2][-]{subsection.4.2.1}{Low Level Addition}{section.4.2}
+\BOOKMARK [2][-]{subsection.4.2.2}{Low Level Subtraction}{section.4.2}
+\BOOKMARK [2][-]{subsection.4.2.3}{High Level Addition}{section.4.2}
+\BOOKMARK [2][-]{subsection.4.2.4}{High Level Subtraction}{section.4.2}
+\BOOKMARK [1][-]{section.4.3}{Bit and Digit Shifting}{chapter.4}
+\BOOKMARK [2][-]{subsection.4.3.1}{Multiplication by Two}{section.4.3}
+\BOOKMARK [2][-]{subsection.4.3.2}{Division by Two}{section.4.3}
+\BOOKMARK [1][-]{section.4.4}{Polynomial Basis Operations}{chapter.4}
+\BOOKMARK [2][-]{subsection.4.4.1}{Multiplication by x}{section.4.4}
+\BOOKMARK [2][-]{subsection.4.4.2}{Division by x}{section.4.4}
+\BOOKMARK [1][-]{section.4.5}{Powers of Two}{chapter.4}
+\BOOKMARK [2][-]{subsection.4.5.1}{Multiplication by Power of Two}{section.4.5}
+\BOOKMARK [2][-]{subsection.4.5.2}{Division by Power of Two}{section.4.5}
+\BOOKMARK [2][-]{subsection.4.5.3}{Remainder of Division by Power of Two}{section.4.5}
+\BOOKMARK [0][-]{chapter.5}{Multiplication and Squaring}{}
+\BOOKMARK [1][-]{section.5.1}{The Multipliers}{chapter.5}
+\BOOKMARK [1][-]{section.5.2}{Multiplication}{chapter.5}
+\BOOKMARK [2][-]{subsection.5.2.1}{The Baseline Multiplication}{section.5.2}
+\BOOKMARK [2][-]{subsection.5.2.2}{Faster Multiplication by the ``Comba'' Method}{section.5.2}
+\BOOKMARK [2][-]{subsection.5.2.3}{Polynomial Basis Multiplication}{section.5.2}
+\BOOKMARK [2][-]{subsection.5.2.4}{Karatsuba Multiplication}{section.5.2}
+\BOOKMARK [2][-]{subsection.5.2.5}{Toom-Cook 3-Way Multiplication}{section.5.2}
+\BOOKMARK [2][-]{subsection.5.2.6}{Signed Multiplication}{section.5.2}
+\BOOKMARK [1][-]{section.5.3}{Squaring}{chapter.5}
+\BOOKMARK [2][-]{subsection.5.3.1}{The Baseline Squaring Algorithm}{section.5.3}
+\BOOKMARK [2][-]{subsection.5.3.2}{Faster Squaring by the ``Comba'' Method}{section.5.3}
+\BOOKMARK [2][-]{subsection.5.3.3}{Polynomial Basis Squaring}{section.5.3}
+\BOOKMARK [2][-]{subsection.5.3.4}{Karatsuba Squaring}{section.5.3}
+\BOOKMARK [2][-]{subsection.5.3.5}{Toom-Cook Squaring}{section.5.3}
+\BOOKMARK [2][-]{subsection.5.3.6}{High Level Squaring}{section.5.3}
+\BOOKMARK [0][-]{chapter.6}{Modular Reduction}{}
+\BOOKMARK [1][-]{section.6.1}{Basics of Modular Reduction}{chapter.6}
+\BOOKMARK [1][-]{section.6.2}{The Barrett Reduction}{chapter.6}
+\BOOKMARK [2][-]{subsection.6.2.1}{Fixed Point Arithmetic}{section.6.2}
+\BOOKMARK [2][-]{subsection.6.2.2}{Choosing a Radix Point}{section.6.2}
+\BOOKMARK [2][-]{subsection.6.2.3}{Trimming the Quotient}{section.6.2}
+\BOOKMARK [2][-]{subsection.6.2.4}{Trimming the Residue}{section.6.2}
+\BOOKMARK [2][-]{subsection.6.2.5}{The Barrett Algorithm}{section.6.2}
+\BOOKMARK [2][-]{subsection.6.2.6}{The Barrett Setup Algorithm}{section.6.2}
+\BOOKMARK [1][-]{section.6.3}{The Montgomery Reduction}{chapter.6}
+\BOOKMARK [2][-]{subsection.6.3.1}{Digit Based Montgomery Reduction}{section.6.3}
+\BOOKMARK [2][-]{subsection.6.3.2}{Baseline Montgomery Reduction}{section.6.3}
+\BOOKMARK [2][-]{subsection.6.3.3}{Faster ``Comba'' Montgomery Reduction}{section.6.3}
+\BOOKMARK [2][-]{subsection.6.3.4}{Montgomery Setup}{section.6.3}
+\BOOKMARK [1][-]{section.6.4}{The Diminished Radix Algorithm}{chapter.6}
+\BOOKMARK [2][-]{subsection.6.4.1}{Choice of Moduli}{section.6.4}
+\BOOKMARK [2][-]{subsection.6.4.2}{Choice of k}{section.6.4}
+\BOOKMARK [2][-]{subsection.6.4.3}{Restricted Diminished Radix Reduction}{section.6.4}
+\BOOKMARK [2][-]{subsection.6.4.4}{Unrestricted Diminished Radix Reduction}{section.6.4}
+\BOOKMARK [1][-]{section.6.5}{Algorithm Comparison}{chapter.6}
+\BOOKMARK [0][-]{chapter.7}{Exponentiation}{}
+\BOOKMARK [1][-]{section.7.1}{Exponentiation Basics}{chapter.7}
+\BOOKMARK [2][-]{subsection.7.1.1}{Single Digit Exponentiation}{section.7.1}
+\BOOKMARK [1][-]{section.7.2}{k-ary Exponentiation}{chapter.7}
+\BOOKMARK [2][-]{subsection.7.2.1}{Optimal Values of k}{section.7.2}
+\BOOKMARK [2][-]{subsection.7.2.2}{Sliding-Window Exponentiation}{section.7.2}
+\BOOKMARK [1][-]{section.7.3}{Modular Exponentiation}{chapter.7}
+\BOOKMARK [2][-]{subsection.7.3.1}{Barrett Modular Exponentiation}{section.7.3}
+\BOOKMARK [1][-]{section.7.4}{Quick Power of Two}{chapter.7}
+\BOOKMARK [0][-]{chapter.8}{Higher Level Algorithms}{}
+\BOOKMARK [1][-]{section.8.1}{Integer Division with Remainder}{chapter.8}
+\BOOKMARK [2][-]{subsection.8.1.1}{Quotient Estimation}{section.8.1}
+\BOOKMARK [2][-]{subsection.8.1.2}{Normalized Integers}{section.8.1}
+\BOOKMARK [2][-]{subsection.8.1.3}{Radix- Division with Remainder}{section.8.1}
+\BOOKMARK [1][-]{section.8.2}{Single Digit Helpers}{chapter.8}
+\BOOKMARK [2][-]{subsection.8.2.1}{Single Digit Addition and Subtraction}{section.8.2}
+\BOOKMARK [2][-]{subsection.8.2.2}{Single Digit Multiplication}{section.8.2}
+\BOOKMARK [2][-]{subsection.8.2.3}{Single Digit Division}{section.8.2}
+\BOOKMARK [2][-]{subsection.8.2.4}{Single Digit Root Extraction}{section.8.2}
+\BOOKMARK [1][-]{section.8.3}{Random Number Generation}{chapter.8}
+\BOOKMARK [1][-]{section.8.4}{Formatted Representations}{chapter.8}
+\BOOKMARK [2][-]{subsection.8.4.1}{Reading Radix-n Input}{section.8.4}
+\BOOKMARK [2][-]{subsection.8.4.2}{Generating Radix-n Output}{section.8.4}
+\BOOKMARK [0][-]{chapter.9}{Number Theoretic Algorithms}{}
+\BOOKMARK [1][-]{section.9.1}{Greatest Common Divisor}{chapter.9}
+\BOOKMARK [2][-]{subsection.9.1.1}{Complete Greatest Common Divisor}{section.9.1}
+\BOOKMARK [1][-]{section.9.2}{Least Common Multiple}{chapter.9}
+\BOOKMARK [1][-]{section.9.3}{Jacobi Symbol Computation}{chapter.9}
+\BOOKMARK [2][-]{subsection.9.3.1}{Jacobi Symbol}{section.9.3}
+\BOOKMARK [1][-]{section.9.4}{Modular Inverse}{chapter.9}
+\BOOKMARK [2][-]{subsection.9.4.1}{General Case}{section.9.4}
+\BOOKMARK [1][-]{section.9.5}{Primality Tests}{chapter.9}
+\BOOKMARK [2][-]{subsection.9.5.1}{Trial Division}{section.9.5}
+\BOOKMARK [2][-]{subsection.9.5.2}{The Fermat Test}{section.9.5}
+\BOOKMARK [2][-]{subsection.9.5.3}{The Miller-Rabin Test}{section.9.5}
============================================================
# tommath.pdf is binary
============================================================
--- tommath.src 5d6ecf084c52bb30161a65e0db3f71b3a0fb4882
+++ tommath.src 5d6ecf084c52bb30161a65e0db3f71b3a0fb4882
@@ -0,0 +1,6287 @@
+\documentclass[b5paper]{book}
+\usepackage{hyperref}
+\usepackage{makeidx}
+\usepackage{amssymb}
+\usepackage{color}
+\usepackage{alltt}
+\usepackage{graphicx}
+\usepackage{layout}
+\def\union{\cup}
+\def\intersect{\cap}
+\def\getsrandom{\stackrel{\rm R}{\gets}}
+\def\cross{\times}
+\def\cat{\hspace{0.5em} \| \hspace{0.5em}}
+\def\catn{$\|$}
+\def\divides{\hspace{0.3em} | \hspace{0.3em}}
+\def\nequiv{\not\equiv}
+\def\approx{\raisebox{0.2ex}{\mbox{\small $\sim$}}}
+\def\lcm{{\rm lcm}}
+\def\gcd{{\rm gcd}}
+\def\log{{\rm log}}
+\def\ord{{\rm ord}}
+\def\abs{{\mathit abs}}
+\def\rep{{\mathit rep}}
+\def\mod{{\mathit\ mod\ }}
+\renewcommand{\pmod}[1]{\ ({\rm mod\ }{#1})}
+\newcommand{\floor}[1]{\left\lfloor{#1}\right\rfloor}
+\newcommand{\ceil}[1]{\left\lceil{#1}\right\rceil}
+\def\Or{{\rm\ or\ }}
+\def\And{{\rm\ and\ }}
+\def\iff{\hspace{1em}\Longleftrightarrow\hspace{1em}}
+\def\implies{\Rightarrow}
+\def\undefined{{\rm ``undefined"}}
+\def\Proof{\vspace{1ex}\noindent {\bf Proof:}\hspace{1em}}
+\let\oldphi\phi
+\def\phi{\varphi}
+\def\Pr{{\rm Pr}}
+\newcommand{\str}[1]{{\mathbf{#1}}}
+\def\F{{\mathbb F}}
+\def\N{{\mathbb N}}
+\def\Z{{\mathbb Z}}
+\def\R{{\mathbb R}}
+\def\C{{\mathbb C}}
+\def\Q{{\mathbb Q}}
+\definecolor{DGray}{gray}{0.5}
+\newcommand{\emailaddr}[1]{\mbox{$<${#1}$>$}}
+\def\twiddle{\raisebox{0.3ex}{\mbox{\tiny $\sim$}}}
+\def\gap{\vspace{0.5ex}}
+\makeindex
+\begin{document}
+\frontmatter
+\pagestyle{empty}
+\title{Implementing Multiple Precision Arithmetic \\ ~ \\ Draft Edition }
+\author{\mbox{
+%\begin{small}
+\begin{tabular}{c}
+Tom St Denis \\
+Algonquin College \\
+\\
+Mads Rasmussen \\
+Open Communications Security \\
+\\
+Greg Rose \\
+QUALCOMM Australia \\
+\end{tabular}
+%\end{small}
+}
+}
+\maketitle
+This text has been placed in the public domain. This text corresponds to the v0.30 release of the
+LibTomMath project.
+
+\begin{alltt}
+Tom St Denis
+111 Banning Rd
+Ottawa, Ontario
+K2L 1C3
+Canada
+
+Phone: 1-613-836-3160
+Email: tomstdenis@iahu.ca
+\end{alltt}
+
+This text is formatted to the international B5 paper size of 176mm wide by 250mm tall using the \LaTeX{}
+{\em book} macro package and the Perl {\em booker} package.
+
+\tableofcontents
+\listoffigures
+\chapter*{Prefaces to the Draft Edition}
+I started this text in April 2003 to complement my LibTomMath library. That is, explain how to implement the functions
+contained in LibTomMath. The goal is to have a textbook that any Computer Science student can use when implementing their
+own multiple precision arithmetic. The plan I wanted to follow was flesh out all the
+ideas and concepts I had floating around in my head and then work on it afterwards refining a little bit at a time. Chance
+would have it that I ended up with my summer off from Algonquin College and I was given four months solid to work on the
+text.
+
+Choosing to not waste any time I dove right into the project even before my spring semester was finished. I wrote a bit
+off and on at first. The moment my exams were finished I jumped into long 12 to 16 hour days. The result after only
+a couple of months was a ten chapter, three hundred page draft that I quickly had distributed to anyone who wanted
+to read it. I had Jean-Luc Cooke print copies for me and I brought them to Crypto'03 in Santa Barbara. So far I have
+managed to grab a certain level of attention having people from around the world ask me for copies of the text was certain
+rewarding.
+
+Now we are past December 2003. By this time I had pictured that I would have at least finished my second draft of the text.
+Currently I am far off from this goal. I've done partial re-writes of chapters one, two and three but they are not even
+finished yet. I haven't given up on the project, only had some setbacks. First O'Reilly declined to publish the text then
+Addison-Wesley and Greg is tried another which I don't know the name of. However, at this point I want to focus my energy
+onto finishing the book not securing a contract.
+
+So why am I writing this text? It seems like a lot of work right? Most certainly it is a lot of work writing a textbook.
+Even the simplest introductory material has to be lined with references and figures. A lot of the text has to be re-written
+from point form to prose form to ensure an easier read. Why am I doing all this work for free then? Simple. My philosophy
+is quite simply ``Open Source. Open Academia. Open Minds'' which means that to achieve a goal of open minds, that is,
+people willing to accept new ideas and explore the unknown you have to make available material they can access freely
+without hinderance.
+
+I've been writing free software since I was about sixteen but only recently have I hit upon software that people have come
+to depend upon. I started LibTomCrypt in December 2001 and now several major companies use it as integral portions of their
+software. Several educational institutions use it as a matter of course and many freelance developers use it as
+part of their projects. To further my contributions I started the LibTomMath project in December 2002 aimed at providing
+multiple precision arithmetic routines that students could learn from. That is write routines that are not only easy
+to understand and follow but provide quite impressive performance considering they are all in standard portable ISO C.
+
+The second leg of my philosophy is ``Open Academia'' which is where this textbook comes in. In the end, when all is
+said and done the text will be useable by educational institutions as a reference on multiple precision arithmetic.
+
+At this time I feel I should share a little information about myself. The most common question I was asked at
+Crypto'03, perhaps just out of professional courtesy, was which school I either taught at or attended. The unfortunate
+truth is that I neither teach at or attend a school of academic reputation. I'm currently at Algonquin College which
+is what I'd like to call ``somewhat academic but mostly vocational'' college. In otherwords, job training.
+
+I'm a 21 year old computer science student mostly self-taught in the areas I am aware of (which includes a half-dozen
+computer science fields, a few fields of mathematics and some English). I look forward to teaching someday but I am
+still far off from that goal.
+
+Now it would be improper for me to not introduce the rest of the texts co-authors. While they are only contributing
+corrections and editorial feedback their support has been tremendously helpful in presenting the concepts laid out
+in the text so far. Greg has always been there for me. He has tracked my LibTom projects since their inception and even
+sent cheques to help pay tuition from time to time. His background has provided a wonderful source to bounce ideas off
+of and improve the quality of my writing. Mads is another fellow who has just ``been there''. I don't even recall what
+his interest in the LibTom projects is but I'm definitely glad he has been around. His ability to catch logical errors
+in my written English have saved me on several occasions to say the least.
+
+What to expect next? Well this is still a rough draft. I've only had the chance to update a few chapters. However, I've
+been getting the feeling that people are starting to use my text and I owe them some updated material. My current tenative
+plan is to edit one chapter every two weeks starting January 4th. It seems insane but my lower course load at college
+should provide ample time. By Crypto'04 I plan to have a 2nd draft of the text polished and ready to hand out to as many
+people who will take it.
+
+\begin{flushright} Tom St Denis \end{flushright}
+
+\newpage
+I found the opportunity to work with Tom appealing for several reasons, not only could I broaden my own horizons, but also
+contribute to educate others facing the problem of having to handle big number mathematical calculations.
+
+This book is Tom's child and he has been caring and fostering the project ever since the beginning with a clear mind of
+how he wanted the project to turn out. I have helped by proofreading the text and we have had several discussions about
+the layout and language used.
+
+I hold a masters degree in cryptography from the University of Southern Denmark and have always been interested in the
+practical aspects of cryptography.
+
+Having worked in the security consultancy business for several years in S\~{a}o Paulo, Brazil, I have been in touch with a
+great deal of work in which multiple precision mathematics was needed. Understanding the possibilities for speeding up
+multiple precision calculations is often very important since we deal with outdated machine architecture where modular
+reductions, for example, become painfully slow.
+
+This text is for people who stop and wonder when first examining algorithms such as RSA for the first time and asks
+themselves, ``You tell me this is only secure for large numbers, fine; but how do you implement these numbers?''
+
+\begin{flushright}
+Mads Rasmussen
+
+S\~{a}o Paulo - SP
+
+Brazil
+\end{flushright}
+
+\newpage
+It's all because I broke my leg. That just happened to be at about the same time that Tom asked for someone to review the section of the book about
+Karatsuba multiplication. I was laid up, alone and immobile, and thought ``Why not?'' I vaguely knew what Karatsuba multiplication was, but not
+really, so I thought I could help, learn, and stop myself from watching daytime cable TV, all at once.
+
+At the time of writing this, I've still not met Tom or Mads in meatspace. I've been following Tom's progress since his first splash on the
+sci.crypt Usenet news group. I watched him go from a clueless newbie, to the cryptographic equivalent of a reformed smoker, to a real
+contributor to the field, over a period of about two years. I've been impressed with his obvious intelligence, and astounded by his productivity.
+Of course, he's young enough to be my own child, so he doesn't have my problems with staying awake.
+
+When I reviewed that single section of the book, in its very earliest form, I was very pleasantly surprised. So I decided to collaborate more fully,
+and at least review all of it, and perhaps write some bits too. There's still a long way to go with it, and I have watched a number of close
+friends go through the mill of publication, so I think that the way to go is longer than Tom thinks it is. Nevertheless, it's a good effort,
+and I'm pleased to be involved with it.
+
+\begin{flushright}
+Greg Rose, Sydney, Australia, June 2003.
+\end{flushright}
+
+\mainmatter
+\pagestyle{headings}
+\chapter{Introduction}
+\section{Multiple Precision Arithmetic}
+
+\subsection{What is Multiple Precision Arithmetic?}
+When we think of long-hand arithmetic such as addition or multiplication we rarely consider the fact that we instinctively
+raise or lower the precision of the numbers we are dealing with. For example, in decimal we almost immediate can
+reason that $7$ times $6$ is $42$. However, $42$ has two digits of precision as opposed to one digit we started with.
+Further multiplications of say $3$ result in a larger precision result $126$. In these few examples we have multiple
+precisions for the numbers we are working with. Despite the various levels of precision a single subset\footnote{With the occasional optimization.}
+ of algorithms can be designed to accomodate them.
+
+By way of comparison a fixed or single precision operation would lose precision on various operations. For example, in
+the decimal system with fixed precision $6 \cdot 7 = 2$.
+
+Essentially at the heart of computer based multiple precision arithmetic are the same long-hand algorithms taught in
+schools to manually add, subtract, multiply and divide.
+
+\subsection{The Need for Multiple Precision Arithmetic}
+The most prevalent need for multiple precision arithmetic, often referred to as ``bignum'' math, is within the implementation
+of public-key cryptography algorithms. Algorithms such as RSA \cite{RSAREF} and Diffie-Hellman \cite{DHREF} require
+integers of significant magnitude to resist known cryptanalytic attacks. For example, at the time of this writing a
+typical RSA modulus would be at least greater than $10^{309}$. However, modern programming languages such as ISO C \cite{ISOC} and
+Java \cite{JAVA} only provide instrinsic support for integers which are relatively small and single precision.
+
+\begin{figure}[!here]
+\begin{center}
+\begin{tabular}{|r|c|}
+\hline \textbf{Data Type} & \textbf{Range} \\
+\hline char & $-128 \ldots 127$ \\
+\hline short & $-32768 \ldots 32767$ \\
+\hline long & $-2147483648 \ldots 2147483647$ \\
+\hline long long & $-9223372036854775808 \ldots 9223372036854775807$ \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Typical Data Types for the C Programming Language}
+\label{fig:ISOC}
+\end{figure}
+
+The largest data type guaranteed to be provided by the ISO C programming
+language\footnote{As per the ISO C standard. However, each compiler vendor is allowed to augment the precision as they
+see fit.} can only represent values up to $10^{19}$ as shown in figure \ref{fig:ISOC}. On its own the C language is
+insufficient to accomodate the magnitude required for the problem at hand. An RSA modulus of magnitude $10^{19}$ could be
+trivially factored\footnote{A Pollard-Rho factoring would take only $2^{16}$ time.} on the average desktop computer,
+rendering any protocol based on the algorithm insecure. Multiple precision algorithms solve this very problem by
+extending the range of representable integers while using single precision data types.
+
+Most advancements in fast multiple precision arithmetic stem from the need for faster and more efficient cryptographic
+primitives. Faster modular reduction and exponentiation algorithms such as Barrett's algorithm, which have appeared in
+various cryptographic journals, can render algorithms such as RSA and Diffie-Hellman more efficient. In fact, several
+major companies such as RSA Security, Certicom and Entrust have built entire product lines on the implementation and
+deployment of efficient algorithms.
+
+However, cryptography is not the only field of study that can benefit from fast multiple precision integer routines.
+Another auxiliary use of multiple precision integers is high precision floating point data types.
+The basic IEEE \cite{IEEE} standard floating point type is made up of an integer mantissa $q$, an exponent $e$ and a sign bit $s$.
+Numbers are given in the form $n = q \cdot b^e \cdot -1^s$ where $b = 2$ is the most common base for IEEE. Since IEEE
+floating point is meant to be implemented in hardware the precision of the mantissa is often fairly small
+(\textit{23, 48 and 64 bits}). The mantissa is merely an integer and a multiple precision integer could be used to create
+a mantissa of much larger precision than hardware alone can efficiently support. This approach could be useful where
+scientific applications must minimize the total output error over long calculations.
+
+Another use for large integers is within arithmetic on polynomials of large characteristic (i.e. $GF(p)[x]$ for large $p$).
+In fact the library discussed within this text has already been used to form a polynomial basis library\footnote{See \url{http://poly.libtomcrypt.org} for more details.}.
+
+\subsection{Benefits of Multiple Precision Arithmetic}
+\index{precision}
+The benefit of multiple precision representations over single or fixed precision representations is that
+no precision is lost while representing the result of an operation which requires excess precision. For example,
+the product of two $n$-bit integers requires at least $2n$ bits of precision to be represented faithfully. A multiple
+precision algorithm would augment the precision of the destination to accomodate the result while a single precision system
+would truncate excess bits to maintain a fixed level of precision.
+
+It is possible to implement algorithms which require large integers with fixed precision algorithms. For example, elliptic
+curve cryptography (\textit{ECC}) is often implemented on smartcards by fixing the precision of the integers to the maximum
+size the system will ever need. Such an approach can lead to vastly simpler algorithms which can accomodate the
+integers required even if the host platform cannot natively accomodate them\footnote{For example, the average smartcard
+processor has an 8 bit accumulator.}. However, as efficient as such an approach may be, the resulting source code is not
+normally very flexible. It cannot, at runtime, accomodate inputs of higher magnitude than the designer anticipated.
+
+Multiple precision algorithms have the most overhead of any style of arithmetic. For the the most part the
+overhead can be kept to a minimum with careful planning, but overall, it is not well suited for most memory starved
+platforms. However, multiple precision algorithms do offer the most flexibility in terms of the magnitude of the
+inputs. That is, the same algorithms based on multiple precision integers can accomodate any reasonable size input
+without the designer's explicit forethought. This leads to lower cost of ownership for the code as it only has to
+be written and tested once.
+
+\section{Purpose of This Text}
+The purpose of this text is to instruct the reader regarding how to implement efficient multiple precision algorithms.
+That is to not only explain a limited subset of the core theory behind the algorithms but also the various ``house keeping''
+elements that are neglected by authors of other texts on the subject. Several well reknowned texts \cite{TAOCPV2,HAC}
+give considerably detailed explanations of the theoretical aspects of algorithms and often very little information
+regarding the practical implementation aspects.
+
+In most cases how an algorithm is explained and how it is actually implemented are two very different concepts. For
+example, the Handbook of Applied Cryptography (\textit{HAC}), algorithm 14.7 on page 594, gives a relatively simple
+algorithm for performing multiple precision integer addition. However, the description lacks any discussion concerning
+the fact that the two integer inputs may be of differing magnitudes. As a result the implementation is not as simple
+as the text would lead people to believe. Similarly the division routine (\textit{algorithm 14.20, pp. 598}) does not
+discuss how to handle sign or handle the dividend's decreasing magnitude in the main loop (\textit{step \#3}).
+
+Both texts also do not discuss several key optimal algorithms required such as ``Comba'' and Karatsuba multipliers
+and fast modular inversion, which we consider practical oversights. These optimal algorithms are vital to achieve
+any form of useful performance in non-trivial applications.
+
+To solve this problem the focus of this text is on the practical aspects of implementing a multiple precision integer
+package. As a case study the ``LibTomMath''\footnote{Available at \url{http://math.libtomcrypt.org}} package is used
+to demonstrate algorithms with real implementations\footnote{In the ISO C programming language.} that have been field
+tested and work very well. The LibTomMath library is freely available on the Internet for all uses and this text
+discusses a very large portion of the inner workings of the library.
+
+The algorithms that are presented will always include at least one ``pseudo-code'' description followed
+by the actual C source code that implements the algorithm. The pseudo-code can be used to implement the same
+algorithm in other programming languages as the reader sees fit.
+
+This text shall also serve as a walkthrough of the creation of multiple precision algorithms from scratch. Showing
+the reader how the algorithms fit together as well as where to start on various taskings.
+
+\section{Discussion and Notation}
+\subsection{Notation}
+A multiple precision integer of $n$-digits shall be denoted as $x = (x_{n-1} ... x_1 x_0)_{ \beta }$ and represent
+the integer $x \equiv \sum_{i=0}^{n-1} x_i\beta^i$. The elements of the array $x$ are said to be the radix $\beta$ digits
+of the integer. For example, $x = (1,2,3)_{10}$ would represent the integer
+$1\cdot 10^2 + 2\cdot10^1 + 3\cdot10^0 = 123$.
+
+\index{mp\_int}
+The term ``mp\_int'' shall refer to a composite structure which contains the digits of the integer it represents, as well
+as auxilary data required to manipulate the data. These additional members are discussed further in section
+\ref{sec:MPINT}. For the purposes of this text a ``multiple precision integer'' and an ``mp\_int'' are assumed to be
+synonymous. When an algorithm is specified to accept an mp\_int variable it is assumed the various auxliary data members
+are present as well. An expression of the type \textit{variablename.item} implies that it should evaluate to the
+member named ``item'' of the variable. For example, a string of characters may have a member ``length'' which would
+evaluate to the number of characters in the string. If the string $a$ equals ``hello'' then it follows that
+$a.length = 5$.
+
+For certain discussions more generic algorithms are presented to help the reader understand the final algorithm used
+to solve a given problem. When an algorithm is described as accepting an integer input it is assumed the input is
+a plain integer with no additional multiple-precision members. That is, algorithms that use integers as opposed to
+mp\_ints as inputs do not concern themselves with the housekeeping operations required such as memory management. These
+algorithms will be used to establish the relevant theory which will subsequently be used to describe a multiple
+precision algorithm to solve the same problem.
+
+\subsection{Precision Notation}
+For the purposes of this text a single precision variable must be able to represent integers in the range
+$0 \le x < q \beta$ while a double precision variable must be able to represent integers in the range
+$0 \le x < q \beta^2$. The variable $\beta$ represents the radix of a single digit of a multiple precision integer and
+must be of the form $q^p$ for $q, p \in \Z^+$. The extra radix-$q$ factor allows additions and subtractions to proceed
+without truncation of the carry. Since all modern computers are binary, it is assumed that $q$ is two, for all intents
+and purposes.
+
+\index{mp\_digit} \index{mp\_word}
+Within the source code that will be presented for each algorithm, the data type \textbf{mp\_digit} will represent
+a single precision integer type, while, the data type \textbf{mp\_word} will represent a double precision integer type. In
+several algorithms (notably the Comba routines) temporary results will be stored in arrays of double precision mp\_words.
+For the purposes of this text $x_j$ will refer to the $j$'th digit of a single precision array and $\hat x_j$ will refer to
+the $j$'th digit of a double precision array. Whenever an expression is to be assigned to a double precision
+variable it is assumed that all single precision variables are promoted to double precision during the evaluation.
+Expressions that are assigned to a single precision variable are truncated to fit within the precision of a single
+precision data type.
+
+For example, if $\beta = 10^2$ a single precision data type may represent a value in the
+range $0 \le x < 10^3$, while a double precision data type may represent a value in the range $0 \le x < 10^5$. Let
+$a = 23$ and $b = 49$ represent two single precision variables. The single precision product shall be written
+as $c \leftarrow a \cdot b$ while the double precision product shall be written as $\hat c \leftarrow a \cdot b$.
+In this particular case, $\hat c = 1127$ and $c = 127$. The most significant digit of the product would not fit
+in a single precision data type and as a result $c \ne \hat c$.
+
+\subsection{Algorithm Inputs and Outputs}
+Within the algorithm descriptions all variables are assumed to be scalars of either single or double precision
+as indicated. The only exception to this rule is when variables have been indicated to be of type mp\_int. This
+distinction is important as scalars are often used as array indicies and various other counters.
+
+\subsection{Mathematical Expressions}
+The $\lfloor \mbox{ } \rfloor$ brackets imply an expression truncated to an integer not greater than the expression
+itself. For example, $\lfloor 5.7 \rfloor = 5$. Similarly the $\lceil \mbox{ } \rceil$ brackets imply an expression
+rounded to an integer not less than the expression itself. For example, $\lceil 5.1 \rceil = 6$. Typically when
+the $/$ division symbol is used the intention is to perform an integer division with truncation. For example,
+$5/2 = 2$ which will often be written as $\lfloor 5/2 \rfloor = 2$ for clarity. When an expression is written as a
+fraction a real value division is implied, for example ${5 \over 2} = 2.5$.
+
+The norm of a multiple precision integer, for example, $\vert \vert x \vert \vert$ will be used to represent the number of digits in the representation
+of the integer. For example, $\vert \vert 123 \vert \vert = 3$ and $\vert \vert 79452 \vert \vert = 5$.
+
+\subsection{Work Effort}
+\index{big-Oh}
+To measure the efficiency of the specified algorithms, a modified big-Oh notation is used. In this system all
+single precision operations are considered to have the same cost\footnote{Except where explicitly noted.}.
+That is a single precision addition, multiplication and division are assumed to take the same time to
+complete. While this is generally not true in practice, it will simplify the discussions considerably.
+
+Some algorithms have slight advantages over others which is why some constants will not be removed in
+the notation. For example, a normal baseline multiplication (section \ref{sec:basemult}) requires $O(n^2)$ work while a
+baseline squaring (section \ref{sec:basesquare}) requires $O({{n^2 + n}\over 2})$ work. In standard big-Oh notation these
+would both be said to be equivalent to $O(n^2)$. However,
+in the context of the this text this is not the case as the magnitude of the inputs will typically be rather small. As a
+result small constant factors in the work effort will make an observable difference in algorithm efficiency.
+
+All of the algorithms presented in this text have a polynomial time work level. That is, of the form
+$O(n^k)$ for $n, k \in \Z^{+}$. This will help make useful comparisons in terms of the speed of the algorithms and how
+various optimizations will help pay off in the long run.
+
+\section{Exercises}
+Within the more advanced chapters a section will be set aside to give the reader some challenging exercises related to
+the discussion at hand. These exercises are not designed to be prize winning problems, but instead to be thought
+provoking. Wherever possible the problems are forward minded, stating problems that will be answered in subsequent
+chapters. The reader is encouraged to finish the exercises as they appear to get a better understanding of the
+subject material.
+
+That being said, the problems are designed to affirm knowledge of a particular subject matter. Students in particular
+are encouraged to verify they can answer the problems correctly before moving on.
+
+Similar to the exercises of \cite[pp. ix]{TAOCPV2} these exercises are given a scoring system based on the difficulty of
+the problem. However, unlike \cite{TAOCPV2} the problems do not get nearly as hard. The scoring of these
+exercises ranges from one (the easiest) to five (the hardest). The following table sumarizes the
+scoring system used.
+
+\begin{figure}[here]
+\begin{center}
+\begin{small}
+\begin{tabular}{|c|l|}
+\hline $\left [ 1 \right ]$ & An easy problem that should only take the reader a manner of \\
+ & minutes to solve. Usually does not involve much computer time \\
+ & to solve. \\
+\hline $\left [ 2 \right ]$ & An easy problem that involves a marginal amount of computer \\
+ & time usage. Usually requires a program to be written to \\
+ & solve the problem. \\
+\hline $\left [ 3 \right ]$ & A moderately hard problem that requires a non-trivial amount \\
+ & of work. Usually involves trivial research and development of \\
+ & new theory from the perspective of a student. \\
+\hline $\left [ 4 \right ]$ & A moderately hard problem that involves a non-trivial amount \\
+ & of work and research, the solution to which will demonstrate \\
+ & a higher mastery of the subject matter. \\
+\hline $\left [ 5 \right ]$ & A hard problem that involves concepts that are difficult for a \\
+ & novice to solve. Solutions to these problems will demonstrate a \\
+ & complete mastery of the given subject. \\
+\hline
+\end{tabular}
+\end{small}
+\end{center}
+\caption{Exercise Scoring System}
+\end{figure}
+
+Problems at the first level are meant to be simple questions that the reader can answer quickly without programming a solution or
+devising new theory. These problems are quick tests to see if the material is understood. Problems at the second level
+are also designed to be easy but will require a program or algorithm to be implemented to arrive at the answer. These
+two levels are essentially entry level questions.
+
+Problems at the third level are meant to be a bit more difficult than the first two levels. The answer is often
+fairly obvious but arriving at an exacting solution requires some thought and skill. These problems will almost always
+involve devising a new algorithm or implementing a variation of another algorithm previously presented. Readers who can
+answer these questions will feel comfortable with the concepts behind the topic at hand.
+
+Problems at the fourth level are meant to be similar to those of the level three questions except they will require
+additional research to be completed. The reader will most likely not know the answer right away, nor will the text provide
+the exact details of the answer until a subsequent chapter.
+
+Problems at the fifth level are meant to be the hardest
+problems relative to all the other problems in the chapter. People who can correctly answer fifth level problems have a
+mastery of the subject matter at hand.
+
+Often problems will be tied together. The purpose of this is to start a chain of thought that will be discussed in future chapters. The reader
+is encouraged to answer the follow-up problems and try to draw the relevance of problems.
+
+\section{Introduction to LibTomMath}
+
+\subsection{What is LibTomMath?}
+LibTomMath is a free and open source multiple precision integer library written entirely in portable ISO C. By portable it
+is meant that the library does not contain any code that is computer platform dependent or otherwise problematic to use on
+any given platform.
+
+The library has been successfully tested under numerous operating systems including Unix\footnote{All of these
+trademarks belong to their respective rightful owners.}, MacOS, Windows, Linux, PalmOS and on standalone hardware such
+as the Gameboy Advance. The library is designed to contain enough functionality to be able to develop applications such
+as public key cryptosystems and still maintain a relatively small footprint.
+
+\subsection{Goals of LibTomMath}
+
+Libraries which obtain the most efficiency are rarely written in a high level programming language such as C. However,
+even though this library is written entirely in ISO C, considerable care has been taken to optimize the algorithm implementations within the
+library. Specifically the code has been written to work well with the GNU C Compiler (\textit{GCC}) on both x86 and ARM
+processors. Wherever possible, highly efficient algorithms, such as Karatsuba multiplication, sliding window
+exponentiation and Montgomery reduction have been provided to make the library more efficient.
+
+Even with the nearly optimal and specialized algorithms that have been included the Application Programing Interface
+(\textit{API}) has been kept as simple as possible. Often generic place holder routines will make use of specialized
+algorithms automatically without the developer's specific attention. One such example is the generic multiplication
+algorithm \textbf{mp\_mul()} which will automatically use Toom--Cook, Karatsuba, Comba or baseline multiplication
+based on the magnitude of the inputs and the configuration of the library.
+
+Making LibTomMath as efficient as possible is not the only goal of the LibTomMath project. Ideally the library should
+be source compatible with another popular library which makes it more attractive for developers to use. In this case the
+MPI library was used as a API template for all the basic functions. MPI was chosen because it is another library that fits
+in the same niche as LibTomMath. Even though LibTomMath uses MPI as the template for the function names and argument
+passing conventions, it has been written from scratch by Tom St Denis.
+
+The project is also meant to act as a learning tool for students, the logic being that no easy-to-follow ``bignum''
+library exists which can be used to teach computer science students how to perform fast and reliable multiple precision
+integer arithmetic. To this end the source code has been given quite a few comments and algorithm discussion points.
+
+\section{Choice of LibTomMath}
+LibTomMath was chosen as the case study of this text not only because the author of both projects is one and the same but
+for more worthy reasons. Other libraries such as GMP \cite{GMP}, MPI \cite{MPI}, LIP \cite{LIP} and OpenSSL
+\cite{OPENSSL} have multiple precision integer arithmetic routines but would not be ideal for this text for
+reasons that will be explained in the following sub-sections.
+
+\subsection{Code Base}
+The LibTomMath code base is all portable ISO C source code. This means that there are no platform dependent conditional
+segments of code littered throughout the source. This clean and uncluttered approach to the library means that a
+developer can more readily discern the true intent of a given section of source code without trying to keep track of
+what conditional code will be used.
+
+The code base of LibTomMath is well organized. Each function is in its own separate source code file
+which allows the reader to find a given function very quickly. On average there are $76$ lines of code per source
+file which makes the source very easily to follow. By comparison MPI and LIP are single file projects making code tracing
+very hard. GMP has many conditional code segments which also hinder tracing.
+
+When compiled with GCC for the x86 processor and optimized for speed the entire library is approximately $100$KiB\footnote{The notation ``KiB'' means $2^{10}$ octets, similarly ``MiB'' means $2^{20}$ octets.}
+ which is fairly small compared to GMP (over $250$KiB). LibTomMath is slightly larger than MPI (which compiles to about
+$50$KiB) but LibTomMath is also much faster and more complete than MPI.
+
+\subsection{API Simplicity}
+LibTomMath is designed after the MPI library and shares the API design. Quite often programs that use MPI will build
+with LibTomMath without change. The function names correlate directly to the action they perform. Almost all of the
+functions share the same parameter passing convention. The learning curve is fairly shallow with the API provided
+which is an extremely valuable benefit for the student and developer alike.
+
+The LIP library is an example of a library with an API that is awkward to work with. LIP uses function names that are often ``compressed'' to
+illegible short hand. LibTomMath does not share this characteristic.
+
+The GMP library also does not return error codes. Instead it uses a POSIX.1 \cite{POSIX1} signal system where errors
+are signaled to the host application. This happens to be the fastest approach but definitely not the most versatile. In
+effect a math error (i.e. invalid input, heap error, etc) can cause a program to stop functioning which is definitely
+undersireable in many situations.
+
+\subsection{Optimizations}
+While LibTomMath is certainly not the fastest library (GMP often beats LibTomMath by a factor of two) it does
+feature a set of optimal algorithms for tasks such as modular reduction, exponentiation, multiplication and squaring. GMP
+and LIP also feature such optimizations while MPI only uses baseline algorithms with no optimizations. GMP lacks a few
+of the additional modular reduction optimizations that LibTomMath features\footnote{At the time of this writing GMP
+only had Barrett and Montgomery modular reduction algorithms.}.
+
+LibTomMath is almost always an order of magnitude faster than the MPI library at computationally expensive tasks such as modular
+exponentiation. In the grand scheme of ``bignum'' libraries LibTomMath is faster than the average library and usually
+slower than the best libraries such as GMP and OpenSSL by only a small factor.
+
+\subsection{Portability and Stability}
+LibTomMath will build ``out of the box'' on any platform equipped with a modern version of the GNU C Compiler
+(\textit{GCC}). This means that without changes the library will build without configuration or setting up any
+variables. LIP and MPI will build ``out of the box'' as well but have numerous known bugs. Most notably the author of
+MPI has recently stopped working on his library and LIP has long since been discontinued.
+
+GMP requires a configuration script to run and will not build out of the box. GMP and LibTomMath are still in active
+development and are very stable across a variety of platforms.
+
+\subsection{Choice}
+LibTomMath is a relatively compact, well documented, highly optimized and portable library which seems only natural for
+the case study of this text. Various source files from the LibTomMath project will be included within the text. However,
+the reader is encouraged to download their own copy of the library to actually be able to work with the library.
+
+\chapter{Getting Started}
+\section{Library Basics}
+The trick to writing any useful library of source code is to build a solid foundation and work outwards from it. First,
+a problem along with allowable solution parameters should be identified and analyzed. In this particular case the
+inability to accomodate multiple precision integers is the problem. Futhermore, the solution must be written
+as portable source code that is reasonably efficient across several different computer platforms.
+
+After a foundation is formed the remainder of the library can be designed and implemented in a hierarchical fashion.
+That is, to implement the lowest level dependencies first and work towards the most abstract functions last. For example,
+before implementing a modular exponentiation algorithm one would implement a modular reduction algorithm.
+By building outwards from a base foundation instead of using a parallel design methodology the resulting project is
+highly modular. Being highly modular is a desirable property of any project as it often means the resulting product
+has a small footprint and updates are easy to perform.
+
+Usually when I start a project I will begin with the header file. I define the data types I think I will need and
+prototype the initial functions that are not dependent on other functions (within the library). After I
+implement these base functions I prototype more dependent functions and implement them. The process repeats until
+I implement all of the functions I require. For example, in the case of LibTomMath I implemented functions such as
+mp\_init() well before I implemented mp\_mul() and even further before I implemented mp\_exptmod(). As an example as to
+why this design works note that the Karatsuba and Toom-Cook multipliers were written \textit{after} the
+dependent function mp\_exptmod() was written. Adding the new multiplication algorithms did not require changes to the
+mp\_exptmod() function itself and lowered the total cost of ownership (\textit{so to speak}) and of development
+for new algorithms. This methodology allows new algorithms to be tested in a complete framework with relative ease.
+
+FIGU,design_process,Design Flow of the First Few Original LibTomMath Functions.
+
+Only after the majority of the functions were in place did I pursue a less hierarchical approach to auditing and optimizing
+the source code. For example, one day I may audit the multipliers and the next day the polynomial basis functions.
+
+It only makes sense to begin the text with the preliminary data types and support algorithms required as well.
+This chapter discusses the core algorithms of the library which are the dependents for every other algorithm.
+
+\section{What is a Multiple Precision Integer?}
+Recall that most programming languages, in particular ISO C \cite{ISOC}, only have fixed precision data types that on their own cannot
+be used to represent values larger than their precision will allow. The purpose of multiple precision algorithms is
+to use fixed precision data types to create and manipulate multiple precision integers which may represent values
+that are very large.
+
+As a well known analogy, school children are taught how to form numbers larger than nine by prepending more radix ten digits. In the decimal system
+the largest single digit value is $9$. However, by concatenating digits together larger numbers may be represented. Newly prepended digits
+(\textit{to the left}) are said to be in a different power of ten column. That is, the number $123$ can be described as having a $1$ in the hundreds
+column, $2$ in the tens column and $3$ in the ones column. Or more formally $123 = 1 \cdot 10^2 + 2 \cdot 10^1 + 3 \cdot 10^0$. Computer based
+multiple precision arithmetic is essentially the same concept. Larger integers are represented by adjoining fixed
+precision computer words with the exception that a different radix is used.
+
+What most people probably do not think about explicitly are the various other attributes that describe a multiple precision
+integer. For example, the integer $154_{10}$ has two immediately obvious properties. First, the integer is positive,
+that is the sign of this particular integer is positive as opposed to negative. Second, the integer has three digits in
+its representation. There is an additional property that the integer posesses that does not concern pencil-and-paper
+arithmetic. The third property is how many digits placeholders are available to hold the integer.
+
+The human analogy of this third property is ensuring there is enough space on the paper to write the integer. For example,
+if one starts writing a large number too far to the right on a piece of paper they will have to erase it and move left.
+Similarly, computer algorithms must maintain strict control over memory usage to ensure that the digits of an integer
+will not exceed the allowed boundaries. These three properties make up what is known as a multiple precision
+integer or mp\_int for short.
+
+\subsection{The mp\_int Structure}
+\label{sec:MPINT}
+The mp\_int structure is the ISO C based manifestation of what represents a multiple precision integer. The ISO C standard does not provide for
+any such data type but it does provide for making composite data types known as structures. The following is the structure definition
+used within LibTomMath.
+
+\index{mp\_int}
+\begin{verbatim}
+typedef struct {
+ int used, alloc, sign;
+ mp_digit *dp;
+} mp_int;
+\end{verbatim}
+
+The mp\_int structure can be broken down as follows.
+
+\begin{enumerate}
+\item The \textbf{used} parameter denotes how many digits of the array \textbf{dp} contain the digits used to represent
+a given integer. The \textbf{used} count must be positive (or zero) and may not exceed the \textbf{alloc} count.
+
+\item The \textbf{alloc} parameter denotes how
+many digits are available in the array to use by functions before it has to increase in size. When the \textbf{used} count
+of a result would exceed the \textbf{alloc} count all of the algorithms will automatically increase the size of the
+array to accommodate the precision of the result.
+
+\item The pointer \textbf{dp} points to a dynamically allocated array of digits that represent the given multiple
+precision integer. It is padded with $(\textbf{alloc} - \textbf{used})$ zero digits. The array is maintained in a least
+significant digit order. As a pencil and paper analogy the array is organized such that the right most digits are stored
+first starting at the location indexed by zero\footnote{In C all arrays begin at zero.} in the array.