The unified diff between revisions [92f17752..] and [4e40e885..] is displayed below. It can also be downloaded as a raw diff.

This diff has been restricted to the following files: 'src/charset.cpp'

#
#
# patch "src/charset.cpp"
#  from [2e6502139430d9ebc56d754dee9b181a6aed8ab0]
#    to [ed45191ce62ae8c6bd98fa8cb040d82b1cd386ee]
#
============================================================
--- src/charset.cpp	2e6502139430d9ebc56d754dee9b181a6aed8ab0
+++ src/charset.cpp	ed45191ce62ae8c6bd98fa8cb040d82b1cd386ee
@@ -1,25 +1,124 @@
 /*************************************************
 * Character Set Handling Source File             *
-* (C) 1999-2006 The Botan Project                *
+* (C) 1999-2007 Jack Lloyd                       *
 *************************************************/

 #include <botan/charset.h>
 #include <botan/hex.h>
 #include <botan/base64.h>
-#include <botan/libstate.h>
+#include <botan/parsing.h>
 #include <cctype>

 namespace Botan {

 namespace Charset {

+namespace {
+
 /*************************************************
+* Convert from UCS-2 to ISO 8859-1               *
+*************************************************/
+std::string ucs2_to_latin1(const std::string& ucs2)
+   {
+   if(ucs2.size() % 2 == 1)
+      throw Decoding_Error("UCS-2 string has an odd number of bytes");
+
+   std::string latin1;
+
+   for(u32bit j = 0; j != ucs2.size(); j += 2)
+      {
+      const byte c1 = ucs2[j];
+      const byte c2 = ucs2[j+1];
+
+      if(c1 != 0)
+         throw Decoding_Error("UCS-2 has non-Latin1 characters");
+
+      latin1 += static_cast<char>(c2);
+      }
+
+   return latin1;
+   }
+
+/*************************************************
+* Convert from UTF-8 to ISO 8859-1               *
+*************************************************/
+std::string utf8_to_latin1(const std::string& utf8)
+   {
+   std::string iso8859;
+
+   u32bit position = 0;
+   while(position != utf8.size())
+      {
+      const byte c1 = static_cast<byte>(utf8[position++]);
+
+      if(c1 <= 0x7F)
+         iso8859 += static_cast<char>(c1);
+      else if(c1 >= 0xC0 && c1 <= 0xC7)
+         {
+         if(position == utf8.size())
+            throw Decoding_Error("UTF-8: sequence truncated");
+
+         const byte c2 = static_cast<byte>(utf8[position++]);
+         const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F);
+
+         if(iso_char <= 0x7F)
+            throw Decoding_Error("UTF-8: sequence longer than needed");
+
+         iso8859 += static_cast<char>(iso_char);
+         }
+      else
+         throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used");
+      }
+
+   return iso8859;
+   }
+
+/*************************************************
+* Convert from ISO 8859-1 to UTF-8               *
+*************************************************/
+std::string latin1_to_utf8(const std::string& iso8859)
+   {
+   std::string utf8;
+   for(u32bit j = 0; j != iso8859.size(); ++j)
+      {
+      const byte c = static_cast<byte>(iso8859[j]);
+
+      if(c <= 0x7F)
+         utf8 += static_cast<char>(c);
+      else
+         {
+         utf8 += static_cast<char>((0xC0 | (c >> 6)));
+         utf8 += static_cast<char>((0x80 | (c & 0x3F)));
+         }
+      }
+   return utf8;
+   }
+
+}
+
+/*************************************************
 * Perform character set transcoding              *
 *************************************************/
 std::string transcode(const std::string& str,
                       Character_Set to, Character_Set from)
    {
-   return global_state().transcode(str, to, from);
+   if(to == LOCAL_CHARSET)
+      to = LATIN1_CHARSET;
+   if(from == LOCAL_CHARSET)
+      from = LATIN1_CHARSET;
+
+   if(to == from)
+      return str;
+
+   if(from == LATIN1_CHARSET && to == UTF8_CHARSET)
+      return latin1_to_utf8(str);
+   if(from == UTF8_CHARSET && to == LATIN1_CHARSET)
+      return utf8_to_latin1(str);
+   if(from == UCS2_CHARSET && to == LATIN1_CHARSET)
+      return ucs2_to_latin1(str);
+
+   throw Invalid_Argument("Unknown transcoding operation from " +
+                          to_string(from) + " to " + to_string(to));
    }

 /*************************************************
@@ -92,7 +191,8 @@ bool caseless_cmp(char a, char b)
 *************************************************/
 bool caseless_cmp(char a, char b)
    {
-   return (std::tolower((unsigned char)a) == std::tolower((unsigned char)b));
+   return (std::tolower(static_cast<unsigned char>(a)) ==
+           std::tolower(static_cast<unsigned char>(b)));
    }

 }