copypaste

2025-06-02 09:10:50 -04:00
parent 87ccfdf279
commit eb9ae4316f
15 changed files with 3777 additions and 0 deletions
--- a/build_linux64/libamsstring4.linux64.a
+++ b/build_linux64/libamsstring4.linux64.a
--- a/build_linux64/objstore/amsstring4_class.o
+++ b/build_linux64/objstore/amsstring4_class.o
--- a/build_linux64/objstore/amsstring4_portability.o
+++ b/build_linux64/objstore/amsstring4_portability.o
--- a/build_linux64/objstore/amsstring4_tests1.o
+++ b/build_linux64/objstore/amsstring4_tests1.o
--- a/build_linux64/tests
+++ b/build_linux64/tests
--- a/include/amsstring4/amsstring4.hpp
+++ b/include/amsstring4/amsstring4.hpp
@ -4,11 +4,163 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
+#include <string.h>
+#include <stdarg.h>
+#include <vector>
+#include <string>
+#include <limits>
+#include <locale>
+
+#include <amsmathutil25/amsmathutil25.hpp>


 namespace ams
 {

+//wraps the functions strcpy_s and strncpy in a portable manner
+//between linux and microsoft standard C libraries.
+int amsstrcpy_s(char *dest, int size, const char *src);
+
+//wraps the functions sprintf_s and snprintf in a portable manner
+//between linux and microsoft standard C libraries.
+int amssprintf_s(char *s, int n, const char *format, ...);
+
+//Using the C library's sscanf function is more robust
+//than atod or atof. It returns valid numbers for infs and nans
+//Returns nan for any uninterpretable string
+double amsstrtonum(const char *str);
+
+typedef char ams_chartype;
+static const ams_chartype ams_char_cr = (ams_chartype) '\r'; //carriage return
+static const ams_chartype ams_char_lf = (ams_chartype) '\n'; //newline
+static const ams_chartype ams_char_tb = (ams_chartype) '\t'; //tab
+static const ams_chartype ams_char_nt = (ams_chartype) '\0'; //null terminator
+
+class amsstring
+{
+public:
+
+    ams_chartype blank; // null terminator returned for accessing index out of bounds
+    ams_chartype *cstring;
+    int length;
+    //length will be set to the length of the cstring not including the null terminating char
+
+    //Basic functions
+    amsstring();
+    ~amsstring();
+
+    amsstring(amsstring &other);
+    amsstring& operator=(amsstring &other);
+    amsstring(const amsstring &other);
+    const amsstring& operator=(const amsstring &other);
+
+    amsstring(ams_chartype *other);
+    amsstring(const ams_chartype *other);
+
+    amsstring& operator=(ams_chartype *other);
+    const amsstring& operator=(const ams_chartype *other); //assign string constant to amsstring
+    //const amsstring& operator=(const ams_chartype *other) const; //assign string constant to amsstring
+    //const is a disease! 
+    
+    //
+    //amsstring(int length);
+    //amsstring(int length, const ams_chartype initchar);
+
+    int resize(const int newlen);
+    int size() const;
+
+    ams_chartype& operator[](const int ind);
+    const ams_chartype& operator[](const int ind) const;
+    ams_chartype& at(const int ind);
+    const ams_chartype& at(const int ind) const;
+
+    void clear();
+    void setall(const ams_chartype val, const int newlen);
+    void shrinktofit();
+
+    //string comparisons
+    bool operator==(const amsstring &other) const;
+    bool operator==(const char *other) const;
+    bool operator!=(const amsstring &other) const;
+    bool operator!=(const char *other) const;
+
+    //string ordering comparison
+    //alphebetizes strings by ASCII character
+    //longer strings compare larger than shorter ones
+    bool operator<(const amsstring &other) const;
+    bool operator>(const amsstring &other) const;
+    bool operator<(const ams_chartype *other) const;
+    bool operator>(const ams_chartype *other) const;
+
+    //Insert, Remove, and Substring
+    void insert(const int ind, const amsstring other);
+    void insert(const int ind, const ams_chartype *other);
+    void remove(const int ind);
+    void remove(const int ind1, const int ind2);
+    void substring(const int ind1, const int ind2, amsstring *sout) const;
+
+    //Append
+    void append(const amsstring &other);
+    void append(const ams_chartype *other);
+    void append(const ams_chartype other);
+
+    amsstring operator+(const amsstring &other);
+    const amsstring operator+(const amsstring &other) const;
+    amsstring operator+(const ams_chartype *other);
+    const amsstring operator+(const ams_chartype *other) const;
+    amsstring operator+(const ams_chartype other);
+    const amsstring operator+(const ams_chartype other) const;
+
+    //Find
+    int find(const amsstring findstr, const int indstart=0, const bool casesens=1) const;
+    int find(const ams_chartype *findstr, const int indstart=0, const bool casesens=1) const;
+    int find(const ams_chartype c, const int indstart=0, const bool casesens=1) const;
+
+    //formatted input
+    int sprintf(int bufflen, const ams_chartype *formatstring, ...);
+
+    void tolower();
+    void toupper();
+
+    bool isvalidnumber();
+    double strtonum();
 };

+//needs work
+void splitlines(amsstring *s, std::vector<amsstring> *lns);
+void splitlines(amsstring *s, ams::amsarray<amsstring> *lns);
+
+void split(amsstring *s, const ams_chartype delimitchar, std::vector<amsstring> *lns);
+void split(amsstring *s, const ams_chartype delimitchar, ams::amsarray<amsstring> *lns);
+
+void split(amsstring *s, const ams_chartype *delimitstr, std::vector<amsstring> *lns);
+void split(amsstring *s, const ams_chartype *delimitstr, ams::amsarray<amsstring> *lns);
+
+void split(amsstring *s, amsstring *delimitstr, std::vector<amsstring> *lns);
+void split(amsstring *s, amsstring *delimitstr, ams::amsarray<amsstring> *lns);
+
+//splits a string, not counting whitespaces between non-whitespace characters
+void splitwhitespace(amsstring *s, std::vector<amsstring> *lns);
+void splitwhitespace(amsstring *s, ams::amsarray<amsstring> *lns);
+
+//removes all whitespace characters '\t','\r','\n' included
+//to the left and right of the string (but not in the middle)
+void stripwhitespace(amsstring *s);
+
+//completely removes all whitespace entirely
+void stripallwhitespace(amsstring *s);
+
+void freadline(FILE *fp, amsstring *s);
+void freadlines(FILE *fp, std::vector<amsstring> *lines);
+void fwritelines(FILE *fp, amsstring *s);
+void fwritelines(FILE *fp, std::vector<amsstring> *lines);
+void freadtxtfile(FILE *fp, amsstring *s);
+
+}; //end namespace ams
+
+#include <amsstring4/amsstring4_unicode.hpp>
+#include <amsstring4/amsstring4_bintextencoding.hpp>
+#include <amsstring4/amsstring4_tests.hpp>
+
+
 #endif
--- a/include/amsstring4/amsstring4_bintextencoding.hpp
+++ b/include/amsstring4/amsstring4_bintextencoding.hpp
@ -0,0 +1,28 @@
+#ifndef __AMSSTRING4_BINTEXTENCODING_HPP__
+#define __AMSSTRING4_BINTEXTENCODING_HPP__
+
+namespace ams
+{
+
+//Only processes strings of length divisible by 4, with
+//expected 0,1,2 padding chars at the end of the string,
+//and no non-coding characters.
+void base64encode(ams::amsarray<uint8_t> *bytes, amsstring *str);
+
+
+
+int base64decode(amsstring *str, ams::amsarray<uint8_t> *bytes, bool bstrict);
+
+//decodes, ignoring (as in MIME spec) all characters that are not
+//valid b64 alphabet chars, and all padding until the end of the string
+int base64decode_liberal(amsstring *str, ams::amsarray<uint8_t> *bytes);
+
+int base64decode_strict(amsstring *str, ams::amsarray<uint8_t> *bytes);
+
+void test_base64encode();
+void test_base64encode_fuzztest();
+
+};
+
+#endif
+
--- a/include/amsstring4/amsstring4_tests.hpp
+++ b/include/amsstring4/amsstring4_tests.hpp
@ -0,0 +1,27 @@
+#ifndef __AMSSTRING4_TESTS_HPP__
+#define __AMSSTRING4_TESTS_HPP__
+
+namespace ams
+{
+
+    void amsstring3_basic_string_test1();
+    void amsstring3_sscanf_test1();
+    void amsstring3_basic_string_test2();
+    void amsstring3_memoryleakcheck1();
+    void amsstring3_memoryleakcheck2();
+    void amsstring3_stringtests2();
+    void amsstring3_test_find();
+
+    void amsstring3_test_splitlines();
+    void amsstring3_test_split();
+    void amsstring3_test_strip();
+    void amsstring3_test_freadwrite();
+
+    void amsstring3_test_concatenation_operators();
+    
+    
+
+};
+
+#endif
+
--- a/include/amsstring4/amsstring4_unicode.hpp
+++ b/include/amsstring4/amsstring4_unicode.hpp
@ -0,0 +1,22 @@
+#ifndef __AMSSTRING4_UNICODE_HPP__
+#define __AMSSTRING4_UNICODE_HPP__
+
+namespace ams
+{
+
+int string_to_uccodepoints(const amsstring &str, amsarray<uint32_t> &codepoints);
+int string_to_uccodepoints(const amsstring *str, amsarray<uint32_t> *codepoints);
+
+void uccodepoints_to_string(const amsarray<uint32_t> &codepoints, amsstring &str);
+void uccodepoints_to_string(const amsarray<uint32_t> *codepoints, amsstring *str);
+
+void test_unicode_ascii_int_conv();
+void test_unicode_conv1();
+
+void test_unicode_conv2();
+
+
+};
+
+#endif
+
--- a/src/amsstring4/amsstring4_bintextencoding.cpp
+++ b/src/amsstring4/amsstring4_bintextencoding.cpp
@ -0,0 +1,565 @@
+#include <amsstring4/amsstring4.hpp>
+
+namespace ams
+{
+
+//PGP / GPG text armor, binary encoding scheme: 
+//  
+
+//HTML embedded image file binary encoding scheme:
+//example: 
+// <img alt="Embedded Image" width="158" height="158" 
+//   src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAJ4A..." />
+// ref: https://stackoverflow.com/questions/11474346/how-to-encode-images-within-html
+// Embed other stuff!
+// Data URIs can potentially store any type of data, not just images! Try these examples on for size: (X)HTML CSS Embedding Example
+// <link rel="stylesheet" type="text/css"
+//   href="data:text/css;base64,LyogKioqKiogVGVtcGxhdGUgKioq..." />
+// (X)HTML Javascript Embedding Example
+// <script type="text/javascript"
+//   src="data:text/javascript;base64,dmFyIHNjT2JqMSA9IG5ldyBzY3Jv..."></script>
+
+//I think these are both base-64 encodings of a binary blob
+//ref: https://security.stackexchange.com/questions/142043/how-are-pgp-messages-constructed
+
+//also widely used for email attachments
+
+//Base64: 3 bytes (24-bits) 11111111  22222222 33333333
+//  converted to 4 base-64 digits
+//                          111111 112222 222233 333333
+// subtleties on termination of a string of bytes that doesn't divide by 3
+
+//real reference:
+//  https://datatracker.ietf.org/doc/html/rfc4648#section-4
+
+
+    //              Table 1: The Base 64 Alphabet
+
+    //  Value Encoding  Value Encoding  Value Encoding  Value Encoding
+    //      0 A            17 R            34 i            51 z
+    //      1 B            18 S            35 j            52 0
+    //      2 C            19 T            36 k            53 1
+    //      3 D            20 U            37 l            54 2
+    //      4 E            21 V            38 m            55 3
+    //      5 F            22 W            39 n            56 4
+    //      6 G            23 X            40 o            57 5
+    //      7 H            24 Y            41 p            58 6
+    //      8 I            25 Z            42 q            59 7
+    //      9 J            26 a            43 r            60 8
+    //     10 K            27 b            44 s            61 9
+    //     11 L            28 c            45 t            62 +
+    //     12 M            29 d            46 u            63 /
+    //     13 N            30 e            47 v
+    //     14 O            31 f            48 w         (pad) =
+    //     15 P            32 g            49 x
+    //     16 Q            33 h            50 y
+
+
+//ord is 0-63, 64 for '='
+ams_chartype base64_char(int8_t ord)
+{
+    ams_chartype ret;
+    if(ord==64)
+    {
+        ret = '=';
+    }
+    else if(ord>=0&&ord<26)
+    {
+        ret = (ams_chartype)(ord+65);
+    }
+    else if(ord>=26&&ord<52)
+    {
+        ret = (ams_chartype)((ord-26)+97);
+    }
+    else if(ord>=52&&ord<62)
+    {
+        ret = (ams_chartype)((ord-52)+48);
+    }
+    else if(ord==62)
+        ret = '+';
+    else if(ord==63)
+        ret = '/';
+    else
+        ret = '\0'; //NULL character is for an out of range ord
+    return ret;
+}
+
+int8_t base64_ord(ams_chartype ch)
+{
+    uint8_t ret;
+    uint8_t ucval = (unsigned char) ch;
+    if(ucval>=65 && ucval<91)
+    {
+        ret = ucval-65;
+    }
+    else if(ucval>=97 && ucval<123)
+    {
+        ret = ucval-97+26;
+    }
+    else if(ucval>=48 && ucval<58)
+    {
+        ret = ucval-48+52;
+    }
+    else if(ch=='+')
+    {
+        ret = 62;
+    }
+    else if(ch=='/')
+    {
+        ret = 63;
+    }
+    else if(ch=='=')
+    {
+        ret = 64;
+    }
+    else
+    {
+        ret = -1;
+    }
+
+    return ret;
+}
+
+static void b64_encode_3byteblock(uint8_t *bytes, ams_chartype *chars)
+{
+    uint8_t o1,o2,o3,o4;
+
+    o1 = ((bytes[0] & 0b11111100)>>2);
+    o2 = (((bytes[0] & 0b00000011)<<4) + ((bytes[1] & 0b11110000)>>4));
+    o3 = (((bytes[1] & 0b00001111)<<2) + ((bytes[2] & 0b11000000)>>6));
+    o4 = ((bytes[2] & 0b00111111));
+
+    chars[0] = base64_char(o1);
+    chars[1] = base64_char(o2);
+    chars[2] = base64_char(o3);
+    chars[3] = base64_char(o4);
+
+    return;
+}
+
+static void b64_encode_2byteblock(uint8_t *bytes, ams_chartype *chars)
+{
+    uint8_t o1,o2,o3;
+
+    o1 = ((bytes[0] & 0b11111100)>>2);
+    o2 = (((bytes[0] & 0b00000011)<<4) + ((bytes[1] & 0b11110000)>>4));
+    o3 = (((bytes[1] & 0b00001111)<<2));
+
+    chars[0] = base64_char(o1);
+    chars[1] = base64_char(o2);
+    chars[2] = base64_char(o3);
+    chars[3] = '=';
+
+    return;
+}
+
+static void b64_encode_1byteblock(uint8_t *bytes, ams_chartype *chars)
+{
+    uint8_t o1,o2;
+
+    o1 = ((bytes[0] & 0b11111100)>>2);
+    o2 = (((bytes[0] & 0b00000011)<<4));
+
+    chars[0] = base64_char(o1);
+    chars[1] = base64_char(o2);
+    chars[2] = '=';
+    chars[3] = '=';
+
+    return;
+}
+
+static int decode_4charblock(ams_chartype *chars, uint8_t *bytes)
+{
+    int ret = 1;
+
+    uint8_t o1,o2,o3,o4,b1,b2,b3;
+    int len;
+
+    o1 = base64_ord(chars[0]);
+    o2 = base64_ord(chars[1]);
+    o3 = base64_ord(chars[2]);
+    o4 = base64_ord(chars[3]);
+    
+    len = 3;
+    if(chars[3]=='=') {len--; o4 = 0;}
+    if(chars[2]=='=') {len--; o3 = 0;}
+    if(chars[1]=='=') {len--; o2 = 0;}
+    
+    if(len>0)
+    {
+        //111111 222222 333333 444444
+        //11111122 22223333 33444444
+
+        b1 = ((o1<<2)+((o2 & 0b00110000)>>4));
+        b2 = (((o2 & 0b00001111)<<4) + ((o3 & 0b00111100)>>2));
+        b3 = (((o3 & 0b00000011)<<6) + (o4 & 0b00111111));
+        if(len>=1) bytes[0] = b1;
+        if(len>=2) bytes[1] = b2;
+        if(len>=3) bytes[2] = b3;
+    }
+
+    return ret;
+}
+
+void base64encode(ams::amsarray<uint8_t> *bytes, amsstring *str)
+{
+    long I,J;
+    
+    //str->resize((bytes->length*4)/3+2);
+    if( bytes->length % 3 == 0)
+    {
+        str->resize((bytes->length/3)*4);
+    }
+    else
+    {
+        str->resize((bytes->length/3+1)*4);
+    }
+
+    I = 0; J = 0;
+    while(I<bytes->length)
+    {
+        if(bytes->length-I>=3)
+        {
+            //printf("debug3: %c%c%c\n",bytes->data[I],bytes->data[I+1],bytes->data[I+2]);
+            b64_encode_3byteblock(&(bytes->data[I]),&(str->cstring[J]));
+            I = I + 3;
+            J = J + 4;
+        }
+        else if(bytes->length-I==2)
+        {
+            //printf("debug2: %c%c\n",bytes->data[I],bytes->data[I+1]);
+            b64_encode_2byteblock(&(bytes->data[I]),&(str->cstring[J]));
+            I = I + 2;
+            J = J + 4;
+        }
+        else if(bytes->length-I==1)
+        {
+            //printf("debug1: %c\n",bytes->data[I]);
+            b64_encode_1byteblock(&(bytes->data[I]),&(str->cstring[J]));
+            I = I + 1;
+            J = J + 4;
+        }
+        else
+        {
+            break;
+        }
+    }
+
+    //str->shrinktofit();
+    str->cstring[str->length] = '\0';
+
+    return;
+}
+
+static long __intl_localstrlen(amsstring *str)
+{
+    long ret = 0;
+    long I;
+    for(I=0;I<str->length;I++)
+    {
+        if(str->cstring[I]=='\0')
+        {
+            ret = I;
+            break;
+        }
+    }
+    return ret;
+}
+
+
+//Only processes strings of length divisible by 4, with
+//expected 0,1,2 padding chars at the end of the string,
+//and no non-coding characters.
+int base64decode_strict(amsstring *str, ams::amsarray<uint8_t> *bytes)
+{
+    int ret = 1;
+    int fail = 0;
+    long len;
+    long I,J;
+    uint8_t v;
+    int bp;
+
+    if(str->length == 0)
+    {
+        bytes->resize(0);
+        ret = 1;
+        return ret;
+    }
+
+    if(str->length % 4 !=0)
+    {
+        ret = -1; //invalid length
+        bytes->resize(0);
+        return ret;
+    }
+
+    //resize bytes to indicated size
+
+    
+    len = (str->length/4)*3;
+
+    if(str->cstring[str->length-1]=='=') len--;
+    if(str->cstring[str->length-2]=='=') len--;
+    if(str->cstring[str->length-3]=='=')
+    {
+        ret = -2; //unexpected number of padding chars
+        bytes->resize(0);
+        return ret;
+    }
+
+    bytes->resize(len);
+
+    fail = 0;
+    I = 0; J = 0;
+    //for(I=0;I<str->length && fail==0;I++)
+    while(I<str->length && fail==0)
+    {
+        for(bp=0;bp<4;bp++)
+        {
+            v = base64_ord(str->cstring[I+bp]);
+            //printf("debug: %d %d %c %d\n",I,I+bp,str->cstring[I+bp],v);
+            if(v<0 || (v>=64 && (I+bp)<str->length-2) || v>=65)
+            {
+                //printf("dbg: fail!\n");
+                fail = 1;
+                break;
+            }
+        }
+        decode_4charblock(&(str->cstring[I]),&(bytes->data[J]));
+
+        // for(bp=0;bp<3;bp++)
+        // {
+        //     printf("debug: %d %d %c\n",J+bp,bytes->data[J+bp],bytes->data[J+bp]);
+        // }
+
+        I = I + 4;
+        J = J + 3;
+    }
+    if(fail==1)
+    {
+        ret = -3; //invalid char encountered
+        bytes->resize(0);
+    }
+
+    //fail on any non-coding chars while parsing
+
+    return ret;
+}
+
+//decodes, ignoring (as in MIME spec) all characters that are not
+//valid b64 alphabet chars, and all padding until the end of the string
+int base64decode_liberal(amsstring *str, ams::amsarray<uint8_t> *bytes)
+{
+    int ret = 1;
+    long I,J;
+    amsstring str2;
+    ams_chartype ch;
+
+    int64_t v;
+
+    str2.resize(str->length+4);
+    
+    //filter chars in str to remove all non-coding chars
+    J = 0;
+    for(I=0;I<str->length;I++)
+    {
+        ch = str->cstring[I];
+        if(ch=='\0')
+            break;
+        
+        v = base64_ord(ch);
+        if(v>=0 && v<64)
+        {
+            str2.cstring[J] = ch;
+            J = J + 1;
+        }
+    }
+
+    if(J%4==0)
+    {
+        str2.resize(J);
+        str2.cstring[str2.length] = '\0';
+    }
+    if(J%4==1)
+    {
+        ret = 0; //this isn't really valid
+        str2.cstring[J] = '='; J++;
+        str2.cstring[J] = '='; J++;
+        str2.cstring[J] = '='; J++;
+        str2.resize(J);
+        str2.cstring[str2.length] = '\0';
+    }
+    if(J%4==2)
+    {
+        str2.cstring[J] = '='; J++;
+        str2.cstring[J] = '='; J++;
+        str2.resize(J);
+        str2.cstring[str2.length] = '\0';
+    }
+    if(J%4==3)
+    {
+        str2.cstring[J] = '='; J++;
+        str2.resize(J);
+        str2.cstring[str2.length] = '\0';
+    }
+
+    str2.shrinktofit();
+
+    ret = base64decode_strict(&str2,bytes);
+
+    return ret;
+}
+
+static void _intl_convsb(amsstring *s, amsarray<uint8_t> *b)
+{
+    long I;
+    b->resize(s->length);
+    for(I=0;I<s->length;I++)
+    {
+        b->at(I) = (unsigned char) s->cstring[I];
+    }
+    return;
+}
+
+static void _intl_convbs(amsarray<uint8_t> *b,amsstring *s)
+{
+    long I;
+    s->resize(b->length);
+    for(I=0;I<b->length;I++)
+    {
+        s->cstring[I] = b->data[I];
+    }
+    s->cstring[s->length] = '\0';
+    return;
+}
+
+void test_base64encode1()
+{
+    int I;
+    int8_t o1,o2;
+    ams_chartype c1,c2;
+
+    for(I=-5;I<70;I++)
+    {
+        o1 = I;
+        c1 = base64_char(o1);
+        o2 = base64_ord(c1);
+        c2 = base64_char(o2);
+
+        printf("%d %d %c %d %c\n",(int)I,(int)o1,c1,(int)o2,c2);
+    }
+    return;
+}
+
+//segmentation faults in decode_liberal
+
+void test_base64encode()
+{
+    printf("Tests of base64 encoding/deconding.\n");
+
+    amsstring s1,s1e,s2;
+    amsarray<uint8_t> b1,b2;
+    int ret = 0;
+    bool bstrict = 1;
+
+    s1 = "light work.";
+    _intl_convsb(&s1,&b1);
+    base64encode(&b1,&s1e);
+    ret = base64decode(&s1e,&b2,bstrict);
+    _intl_convbs(&b2,&s2);
+
+    // for(I=0;I<b1.length;I++)
+    //     printf("%d:",b1.at(I));
+    // printf("\n");
+
+    printf("Original: '%s'\n",s1.cstring);
+    printf("Encoded:  '%s'\n",s1e.cstring);
+    printf("Decoded:  '%s', ret=%d\n",s2.cstring,ret);
+
+
+    s1 = "light work";
+    _intl_convsb(&s1,&b1);
+    base64encode(&b1,&s1e);
+    ret = base64decode(&s1e,&b2,bstrict);
+    _intl_convbs(&b2,&s2);
+
+    // for(I=0;I<b1.length;I++)
+    //     printf("%d:",b1.at(I));
+    // printf("\n");
+
+    printf("Original: '%s'\n",s1.cstring);
+    printf("Encoded:  '%s'\n",s1e.cstring);
+    printf("Decoded:  '%s', ret=%d\n",s2.cstring,ret);
+
+    s1 = "light wor";
+    _intl_convsb(&s1,&b1);
+    base64encode(&b1,&s1e);
+    s1e.insert(3,"\n");
+    s1e.insert(5,"\t");
+    s1e.insert(7,"}");
+    //s1e.insert(1,"}");
+    ret = base64decode(&s1e,&b2,bstrict);
+    //ret = base64decode_strict(&s1e,&b2);
+    _intl_convbs(&b2,&s2);
+
+    // for(I=0;I<b1.length;I++)
+    //     printf("%d:",b1.at(I));
+    // printf("\n");
+
+    printf("Original: '%s'\n",s1.cstring);
+    printf("Encoded:  '%s'\n",s1e.cstring);
+    printf("Decoded:  '%s', ret=%d\n",s2.cstring,ret);
+
+    return;
+}
+
+void test_base64encode_fuzztest()
+{
+    long I,J;
+    ams::amsarray<uint8_t> bytes,bytes2;
+    amsstring str,str2;
+
+    int passed = 0;
+    int failed = 0;
+
+    for(I=0;I<100;I++)
+    {
+        printf("Test %d\n",(int)I);
+        bytes.resize(I);
+        for(J=0;J<bytes.size();J++)
+        {
+            bytes.data[J] = randd()*255;
+        }
+        base64encode(&bytes,&str);
+        base64decode(&str,&bytes2,1);
+        base64encode(&bytes2,&str2);
+        if(bytes==bytes2 && str==str2)
+        {
+            passed++;
+        }
+        else
+        {
+            failed++;
+        }
+        
+    }
+    printf("passed: %d, failed %d\n",passed,failed);
+    return;
+}
+
+int base64decode(amsstring *str, ams::amsarray<uint8_t> *bytes, bool bstrict=0)
+{
+    int ret = 0;
+    if(bstrict==1)
+    {
+        ret = base64decode_strict(str,bytes);
+    }
+    else
+    {
+        ret = base64decode_liberal(str,bytes);
+    }
+    return ret;
+}
+
+
+};
--- a/src/amsstring4/amsstring4_class.cpp
+++ b/src/amsstring4/amsstring4_class.cpp
--- a/src/amsstring4/amsstring4_portability.cpp
+++ b/src/amsstring4/amsstring4_portability.cpp
@ -0,0 +1,177 @@
+#include <amsstring4/amsstring4.hpp>
+
+namespace ams
+{
+    
+//snprintf, vsnprintf should now be part of the C++ standard library
+//as of C++11, so I don't think I need quite as elaborate a compatibility
+//shim as in the previous library.
+//It *should* compile with MinGW and Visual Studio.
+
+
+//src must be a NULL terminated string, or have more indices than the size of the destination buffer
+//I'm seeing platform specific memory leaks in the strncpy and strcpy_s implementations in valgrind
+//This is a dirt simple function, I shouldn't *need* to depend on a platform specific implementation,
+//... so, writing my own.
+//
+//This function copies the string src to dest. 
+//It stops when either size-1 characters have been copied to
+//dest, or a null terminator has been encountered in src.
+//
+//The return value is the number of characters copied, excluding the null terminator
+//or an error code.
+//
+//All additional positions in dest are padded with null terminators. Size is intended to be the size
+//of the dest buffer.
+int amsstrcpy_s(char *dest, int size, const char *src)
+{
+  int ret = 0;
+  int I=0;
+  char c='\0';
+
+  if(dest==NULL)
+  {
+    ret = -2;
+    return ret;
+  }
+
+  if(src==NULL)
+  {
+    ret = -1;
+    return ret;
+  }
+
+  for(I=0;I<size-1;I++)
+  {
+    c = src[I];
+    ret = I;
+
+    if(c=='\0')
+    {
+      dest[I] = '\0';
+      break;
+    }
+    
+    dest[I] = c;
+  }
+
+  for(I=ret+1;I<size;I++)
+  {
+    dest[I] = '\0';
+  }
+  dest[size-1] = '\0';
+
+  return ret;
+}
+
+
+//wrapper for strcpy_s and strncpy which should be portable between gnu and microsoft C libraries
+//strcpy_s
+//strncpy
+// int amsstrcpy_s(char *dest, int size, const char *src)
+// {
+//   int ret = 0;
+//   if(dest!=NULL)
+//   {
+//     if(src!=NULL)
+//     {
+//         #if defined(LINUX) || defined(linux) || defined(__linux__) || defined(__GNUC__)
+//             //use strncpy
+//             strncpy(dest,src,size);
+//             ret = 0;
+//             if(size>0)
+//             {
+//               dest[size-1] = '\0';
+//             }
+//         #elif defined(__MINGW32__) || defined(__MINGW64__) || defined(_WIN32)
+//             //use strcpy_s
+//             //ret = (int)strcpy_s(dest,size,src);
+//             strcpy_s(dest,size,src);
+//             ret = 0;
+//             if(size>0)
+//             {
+//               dest[size-1] = '\0';
+//             }
+//         #else
+//             #pragma message("amsstrcpy_s: Unsupported architecture - neither linux nor mingw64 nor msvc")
+//         #endif
+//     }
+//     else
+//     {
+//         if(size>0)
+//         {
+//             dest[0] = '\0';
+//             ret = -2; //src was NULL
+//         }
+//     }
+//   }
+//   else
+//   {
+//       ret = -1; //dest was a null pointer 
+//   }
+  
+//   return ret;
+// }
+
+
+//sprintf_s
+//snprintf
+//
+int amssprintf_s(char *s, int n, const char *format, ...)
+{
+  int ret = 0;
+
+  va_list args;
+  va_start(args, format);
+  //exampleV(b, args);
+  //va_arg(val,datatype)
+  if(s!=NULL)
+  {
+    #if defined(LINUX) || defined(linux) || defined(__linux__) || defined(__GNUC__)
+        //use snprintf
+        ret = (int)vsnprintf(s,n,format,args);
+        s[n-1] = '\0';
+
+    #elif defined(__MINGW32__) || defined(__MINGW64__) || defined(_WIN32)
+        //use sprintf_s
+        ret = (int)vsprintf_s(s,n,format,args);
+    #else
+        #pragma message("amssprintf_s: Unsupported architecture - neither linux nor mingw64 nor msvc")
+    #endif
+  }
+
+  va_end(args);
+  return ret;
+}
+
+//Using the C library's sscanf function is more robust
+//than atod or atof. It returns valid numbers for infs and nans
+//Returns nan for any uninterpretable string
+double amsstrtonum(const char *str)
+{
+  int q;
+  double ret = std::numeric_limits<double>::quiet_NaN();
+  try
+  {
+    //sscanf(s.cstring,"%lf",&ret);
+    //stod(const std::string& str, std::size_t* pos)
+    //calls std::strtod
+    //strtod(const char *, &ptr)
+    //wcstrtod
+    //ret = atof(str); //old c-style parser
+    q = sscanf(str,"%lf",&ret);
+    if(q==0)
+    {
+      ret = std::numeric_limits<double>::quiet_NaN();
+    }
+  }
+  catch(int e)
+  {
+    ret = std::numeric_limits<double>::quiet_NaN();
+  }
+  return ret;
+}
+
+
+
+};
--- a/src/amsstring4/amsstring4_tests1.cpp
+++ b/src/amsstring4/amsstring4_tests1.cpp
@ -0,0 +1,475 @@
+#include <amsstring4/amsstring4.hpp>
+
+namespace ams
+{
+
+    void amsstring3_basic_string_test1()
+    {
+        char q1,c;
+        unsigned char q2;
+        uint8_t q3;
+        int I;
+    
+        printf("Basic string tests1.\n");
+        for(I=-127;I<256;I++)
+        {
+            c = (char)I;
+            q2 = (unsigned char) I;
+            //printf("I=%d  %d %c %02x %02x %02x\n",I,(int)c,c,c,(unsigned char)c,I);
+            printf("I=%d, %c, %02x\n",I,q2,q2);
+        }
+    
+        q2 = (unsigned char) '\r';
+        printf("\nLF: %c After LF %c After LF2 \n",q2,q2);
+    }
+    
+    void amsstring3_sscanf_test1()
+    {
+        char buf[500];
+        double d;
+        int q;
+        
+        // //vsnprintf(buf,500," -123.456E10 ");
+        // snprintf(buf,500," -123.456E10 ");
+        // //sprintf_s(buf,500," -123.456E10 ");
+        // q=sscanf(buf,"%lf",&d);
+        // printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
+    
+        // snprintf(buf,500," -inf ");
+        // //sprintf_s(buf,500," -123.456E10 ");
+        // q=sscanf(buf,"%lf",&d);
+        // printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
+    
+        // snprintf(buf,500,"3");
+        // //sprintf_s(buf,500," -123.456E10 ");
+        // q=sscanf(buf,"%lf",&d);
+        // printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
+    
+        // snprintf(buf,500," #.QUAN0 ");
+        // //sprintf_s(buf,500," -123.456E10 ");
+        // q=sscanf(buf,"%lf",&d);
+        // printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
+    
+        // snprintf(buf,500,"nan");
+        // //sprintf_s(buf,500," -123.456E10 ");
+        // q=sscanf(buf,"%lf",&d);
+        // printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
+    
+        amssprintf_s(buf,500,"-3");
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    
+        amssprintf_s(buf,500,"   -3  ");
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    
+        amssprintf_s(buf,500,"\t\t-3\t\n  ");
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    
+        amssprintf_s(buf,500,"   +3E+1  ");
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    
+        amssprintf_s(buf,500,"2,3,4");
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    
+        amssprintf_s(buf,500,"inf");
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    
+        amssprintf_s(buf,500,"-inf");
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    
+        amssprintf_s(buf,500,"nan");
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    
+        amssprintf_s(buf,500,"1.0*4E3");
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    
+        amssprintf_s(buf,2,"2,3,4");
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    
+        amssprintf_s(buf,2,NULL);
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    
+        amssprintf_s(NULL,2,"100");
+        d = amsstrtonum(buf);
+        printf("String %s reads as %1.4g\n",buf,d);
+    }
+    
+    void amsstring3_basic_string_test2()
+    {
+        amsstring s1,s2;
+        //const amsstring s3; //don't do this - just accept that strings must be mutable
+    
+        s1="Hello world";
+        s2 = s1;
+        printf("%d %c\n",(ams_chartype) '\0', (ams_chartype) '\0');
+        printf("s1: '%s', s2: '%s'\n",s1.cstring,s2.cstring);
+        int I;
+        for(I=-5;I<s2.size()+5;I++)
+        {
+            printf("s2[%d]: %d, %c \n",I,s2[I],s2[I]);
+        }
+    
+        for(I=-5;I<s2.size()+5;I++)
+        {
+            s1[I] = 'a';
+        };
+        printf("s1 = %s\n",s1.cstring);
+        for(I=-5;I<s2.size()+5;I++)
+        {
+            printf("s1[%d]: %d, %c \n",I,s1[I],s1[I]);
+        }
+    
+        printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
+        s1 = "Hello";
+        s2 = "Hello";
+        printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
+        s2.resize(15);
+        s2[10] = 'b';
+        //s2[5] = 'b';
+        //s2[6] = 'b';
+        printf("s1.size()=%d, s2.size()=%d\n",s1.size(),s2.size());
+        printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
+        s2[10] = 'a';
+        s2.shrinktofit();
+        printf("s1.size()=%d, s2.size()=%d\n",s1.size(),s2.size());
+        printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
+        
+        s1 = "hello"; s2 = "Hello";
+        printf("s1:'%s'>s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1>s2);
+        s1 = "hello"; s2 = "Hello";
+        printf("s1:'%s'<s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1<s2);
+        s1 = "Hello"; s2 = "Hello";
+        printf("s1:'%s'>s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1>s2);
+        
+        //s3 = "Hello constant world.";
+        //printf("s3= '%s'\n",s3.cstring);
+    
+    }
+    
+    void amsstring3_memoryleakcheck1()
+    {
+        amsstring q1,q2,q3;
+        int I;
+    
+        q1.sprintf(1000,"%1.100g,",ams::pi);
+        printf("q1='%s'\n",q1.cstring);
+        printf("q1.size()=%d\n",q1.size());
+        q2 = q1;
+        for(I=0;I<100;I++)
+        {
+            q1.substring(0,q1.length-1,&q1);
+            printf("q1 = substr; q1='%s'\n",q1.cstring);
+        }
+    
+        q1 = q2;
+        q1.resize(10000000);
+        for(I=0;I<20;I++)
+        {
+            printf("resize test %d\n",(int)I);
+            q2 = q1;
+            q2.resize(10000000);
+            q3 = q2;
+            q3.resize(10000000);
+            q1 = q3;
+            q1.resize(10000000);
+        }
+        return;
+    }
+    
+    void amsstring3_memoryleakcheck2()
+    {
+        int I;
+        amsstring q1;
+    
+        for(I=0;I<100;I++)
+        {
+            q1.sprintf(4,"%1.100g",ams::pi);
+        }
+        printf("q1=%s\n",q1.cstring);
+    }
+    
+    void amsstring3_stringtests2()
+    {
+        amsstring q1,q2;
+        int I;
+        amsarray<amsstring> qarr;
+    
+        q1.insert(0,"Hello world");
+        printf("q1='%s'\n",q1.cstring);
+    
+        for(I=-2;I<15;I++)
+        {
+            q2 = q1;
+            q2.insert(I,"<insert>");
+            printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' size=%d\n",I,q2.cstring,q2.size());
+        }
+    
+        q1 = "Hello world.";
+        for(I=-5;I<15;I++)
+        {
+            q2 = q1;
+            q2.remove(I);
+            printf("q2.remove(%d) = '%s'\n",I,q2.cstring);
+        }
+    
+        for(I=-5;I<15;I++)
+        {
+            q2 = q1;
+            q2.remove(I,I+2);
+            printf("q2.remove(%d,%d) = '%s'\n",I,I+2,q2.cstring);
+        }
+    
+        q1 = "Hello hEllo 1,2,3;";
+        printf("q1='%s'\n",q1.cstring);
+        q1.tolower();
+        printf("q1='%s'\n",q1.cstring);
+        q1.toupper();
+        printf("q1='%s'\n",q1.cstring);
+        
+        q1.append("hello more appened stuff...");
+        printf("q1='%s'\n",q1.cstring);
+        
+        q1.substring(-5,5,&q1);
+        printf("q1='%s'\n",q1.cstring);
+        
+        q1 = "Hello 1,2,3";
+        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
+        q1 = "   3.1415   ";
+        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
+        q1 = "-inf";
+        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
+        q1 = "nan";
+        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
+        q1 = "1.1E1,2.2E2";
+        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
+        q1 = ",1,2,3";
+        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
+        
+        // qarr.resize(1000);
+        // for(I=0;I<1000;I++)
+        // {
+        //     qarr[I].sprintf(1000,"%1.500g\n",ams::pi);
+        //     qarr[I].resize(1000000);
+        // }
+        // printf("%d",qarr[0].size());
+        q1 = "Hello world";
+        for(I=-2;I<15;I++)
+        {
+            q1.substring(I,I+3,&q2);
+            printf("q1[%d:%d] = '%s' size=%d\n",I,I+3,q2.cstring,q2.size());
+        }
+    
+        return;
+    }
+    
+    void amsstring3_test_find()
+    {
+        amsstring q1,q2,q3;
+        int I;
+    
+        q1 = "hello world";
+        q3 = "<inser";
+        printf("q3='%s'\n",q3.cstring);
+        for(I=-1;I<13;I++)
+        {
+            q2 = q1;
+            q2.insert(I,"<insert>");
+            printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' q2.find(q3)=%d\n",I,q2.cstring,q2.find(q3,0,0));
+        }
+    
+        q3 = "<inSeRt>";
+        printf("\nq3='%s'\n",q3.cstring);
+        for(I=-1;I<13;I++)
+        {
+            q2 = q1;
+            q2.insert(I,"<insert>");
+            printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' q2.find(q3)=%d\n",I,q2.cstring,q2.find(q3,0,0));
+        }
+    
+        q3 = "<insert> ";
+        printf("\nq3='%s'\n",q3.cstring);
+        for(I=-1;I<13;I++)
+        {
+            q2 = q1;
+            q2.insert(I,"<insert>");
+            printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' q2.find(q3)=%d\n",I,q2.cstring,q2.find(q3,0,0));
+        }
+    
+        return;
+    }
+    
+    void amsstring3_test_splitlines()
+    {
+        int I;
+        amsstring q1;
+        amsarray<amsstring> lns;
+        std::vector<amsstring> lns2;
+        q1 = "This is a \n string on \n multiple \r\n lines\n\n with CR\\LFs\n";
+        //q1 = "\n\n";
+        //q1 = "";
+        //q1 = "More malformed\r string nonsense\n\r\n\r\r\na";
+    
+        printf("q1='%s'\n",q1.cstring);
+        splitlines(&q1,&lns2);
+        for(I=0;I<lns2.size();I++)
+        {
+            printf("Line %d: '%s'\n",I,lns2[I].cstring);
+        }
+        return;
+    }
+    
+    void amsstring3_test_split()
+    {
+        amsstring q1;
+        std::vector<amsstring> strs;
+        int I;
+    
+        q1 = "this is a string  to split  ";
+        printf("string='%s'\n",q1.cstring);
+        split(&q1," ",&strs);
+        for(I=0;I<strs.size();I++)
+        {
+            printf("S[%d]: '%s'\n",I,strs[I].cstring);
+        }
+    
+        q1 = "A\tbunch of tab\tseparated \tvariables\t";
+        printf("string='%s'\n",q1.cstring);
+        split(&q1,"\t",&strs);
+        for(I=0;I<strs.size();I++)
+        {
+            printf("S[%d]: '%s'\n",I,strs[I].cstring);
+        }
+    
+        q1 = "Delimiter is abcd, a ab abcd qabcqdqabcdq";
+        printf("string='%s'\n",q1.cstring);
+        split(&q1,"abcd",&strs);
+        for(I=0;I<strs.size();I++)
+        {
+            printf("S[%d]: '%s'\n",I,strs[I].cstring);
+        }
+    
+        q1 = "abc";
+        printf("string='%s'\n",q1.cstring);
+        split(&q1,"abcd",&strs);
+        for(I=0;I<strs.size();I++)
+        {
+            printf("S[%d]: '%s'\n",I,strs[I].cstring);
+        }
+    
+        q1 = "";
+        printf("string='%s'\n",q1.cstring);
+        split(&q1,"abcd",&strs);
+        for(I=0;I<strs.size();I++)
+        {
+            printf("S[%d]: '%s'\n",I,strs[I].cstring);
+        }
+    
+        q1 = "A string not to split.";
+        printf("string='%s'\n",q1.cstring);
+        split(&q1,"",&strs);
+        for(I=0;I<strs.size();I++)
+        {
+            printf("S[%d]: '%s'\n",I,strs[I].cstring);
+        }
+    
+        q1 = "A  string to  split\tby\t \twhitespace";
+        printf("string='%s'\n",q1.cstring);
+        splitwhitespace(&q1,&strs);
+        for(I=0;I<strs.size();I++)
+        {
+            printf("S[%d]: '%s'\n",I,strs[I].cstring);
+        }
+    
+        return;
+    }
+    
+    void amsstring3_test_strip()
+    {
+        amsstring q1;
+        std::vector<amsstring> strs;
+        int I;
+    
+        q1 = "   ";
+        printf("string ws  ='%s'\n",q1.cstring);
+        stripwhitespace(&q1);
+        printf("string nows='%s'\n",q1.cstring);
+        
+        q1 = "\t  something = something else\t ";
+        printf("string ws  ='%s'\n",q1.cstring);
+        stripwhitespace(&q1);
+        printf("string nows='%s'\n",q1.cstring);
+        
+        stripwhitespace(NULL);
+    
+        q1 = "\t  something = something else\t ";
+        printf("string ws   ='%s'\n",q1.cstring);
+        stripallwhitespace(&q1);
+        printf("string allws='%s'\n",q1.cstring);
+    
+        return;
+    }
+    
+    void amsstring3_test_freadwrite()
+    {
+        FILE *fp = NULL;
+        FILE *fp2 = NULL;
+        int I;
+        amsstring q;
+        std::vector<amsstring> q2;
+    
+        fp = fopen("../ref/0p375_hexbolt.scad","r");
+        fp2 = fopen("../ref/testrewrite.scad","w+");
+    
+        I = 0;
+        while(!feof(fp))
+        {
+            freadline(fp,&q);
+            printf("Line %d: '%s'\n",I,q.cstring);
+            I = I + 1;
+        }
+    
+        fseek(fp,SEEK_SET,0);    
+    
+        freadlines(fp,&q2);
+        for(I=0;I<q2.size();I++)
+        {
+            printf("Line %d: '%s'\n",I,q2[I].cstring);
+        }
+    
+        //q = "This is a test file\nto write\n\thello\n\n";
+        //fwritelines(fp2,&q);
+    
+        fwritelines(fp2,&q2);
+    
+    
+        fclose(fp);
+        fclose(fp2);
+        return;
+    }
+    
+    void amsstring3_test_concatenation_operators()
+    {
+        ams::amsstring a,b,c,d;
+    
+        a = "";
+        a = a + "hello";
+        a = a+ " world\n";
+        b = a+a;
+        c = b+a;
+    
+        printf("%s\n",b.cstring);
+    
+        return;
+    }
+
+};
--- a/src/amsstring4/amsstring4_unicode.cpp
+++ b/src/amsstring4/amsstring4_unicode.cpp
@ -0,0 +1,638 @@
+#include <amsstring4/amsstring4.hpp>
+
+namespace ams
+{
+
+
+    //UC codepoints
+    //0x00 to 0x10FFFF (~24 bits, with the remainder being escape sequences and the like)
+
+    //different processors order bytes differently (endianness)
+
+    //UTF-8
+    // 0x00 - 0x7F: 1 byte
+    // 0x00: U+0000 - only when representing the null character
+
+    //21 bit values
+    // 0b0xxxxxxx                               0x00000000  0x0000007F
+    // 0b110xxxxx 01xxxxxx                      0x00000080  0x000007FF
+    // 0b1110xxxx 01xxxxxx 01xxxxxx             0x00000800  0x0000FFFF
+    // 0b11110xxx 01xxxxxx 01xxxxxx 01xxxxxx    0x00010000  0x0010FFFF
+
+
+    static void _intl_print_ui32bits(uint32_t q)
+    {
+        int I;
+    
+        for(I=32-1;I>=24;I--)
+        {
+            if((q & 1<<I)!=0)
+            {
+                printf("1");
+            }
+            else
+            {
+                printf("0");
+            }
+        }
+        printf(" ");
+        for(I=24-1;I>=16;I--)
+        {
+            if((q & 1<<I)!=0)
+            {
+                printf("1");
+            }
+            else
+            {
+                printf("0");
+            }
+        }
+        printf(" ");
+        for(I=16-1;I>=8;I--)
+        {
+            if((q & 1<<I)!=0)
+            {
+                printf("1");
+            }
+            else
+            {
+                printf("0");
+            }
+        }
+        printf(" ");
+        for(I=8-1;I>=0;I--)
+        {
+            if((q & 1<<I)!=0)
+            {
+                printf("1");
+            }
+            else
+            {
+                printf("0");
+            }
+        }
+        return;
+    }
+    
+    static void _intl_print_ui8bits(uint8_t q)
+    {
+        int I;
+        for(I=8-1;I>=0;I--)
+        {
+            if((q & 1<<I)!=0)
+            {
+                printf("1");
+            }
+            else
+            {
+                printf("0");
+            }
+        }
+        return;
+    }
+    
+    int string_to_uccodepoints(const amsstring &str, amsarray<uint32_t> &codepoints)
+    {
+        long I,J;
+        ams_chartype c0;
+        uint32_t cp0,cpw;
+        
+        int escmode;
+        int escs;
+    
+        int correct = 1;
+    
+        codepoints.reserve(str.length);
+        escmode = 0;
+        escs = 0;
+        for(I=0;I<str.length+1;I++)
+        {
+            c0 = str.cstring[I];
+            if(c0=='\0')
+            {
+                //end of string, terminate search
+                cp0 = 0;
+                codepoints.append(cp0);
+                break;
+            }
+    
+            else if(escmode==0 && (c0 & 0b10000000)==0)
+            {
+                //normal ASCII character
+                cp0 = (uint32_t)((unsigned char)c0);
+                codepoints.append(cp0);
+            }
+            else if(escmode==0 && (c0 & 0b11100000)==0b11000000)
+            {
+                escmode = 1; escs = 1;
+                cp0 = 0;
+                cpw = (uint32_t)(c0 & 0b00011111);
+                cp0 = cp0 + (cpw<<((uint32_t)6));
+                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
+            }
+            else if(escmode==0 && (c0 & 0b11110000)==0b11100000)
+            {
+                escmode = 2; escs = 2;
+                cp0 = 0;
+                cpw = (uint32_t)(c0 & 0b00001111);
+                cp0 = cp0 + (cpw<<((uint32_t)12));
+                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
+            }
+            else if(escmode==0 && (c0 & 0b11111000)==0b11110000)
+            {
+                escmode = 3; escs = 3;
+                cp0 = 0;
+                cpw = (uint32_t)(c0 & 0b00000111);
+                cp0 = cp0 + (cpw<<((uint32_t)18));
+                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
+            }
+    
+            else if(escmode!=0 && escs>0)
+            {
+                if((c0 & 0b11000000)==0b10000000)
+                {
+                    cpw = (uint32_t)(c0 & 0b00111111);
+                    cp0 = cp0 + (cpw<<((uint32_t)6*(escs-1)));
+                    escs--;
+                    //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
+                }
+                else
+                {
+                    //invalid escape character - this is bad UTF-8
+                    correct = 0;
+                    escs = 0;
+                    escmode = 0;
+                    //don't append anything
+                    //printf("incorrect!\n");
+                }
+            }
+    
+            if(escmode!=0 && escs==0)
+            {
+                //end escape mode, push character, return to mode 0;
+                escmode = 0;
+                escs = 0;
+                codepoints.append(cp0);
+            }
+    
+        } //for chars in string
+    
+        codepoints.shrink_to_fit();
+    
+        return correct;
+    }
+    
+    int string_to_uccodepoints(const amsstring *str, amsarray<uint32_t> *codepoints)
+    {
+        long I,J;
+        ams_chartype c0;
+        uint32_t cp0,cpw;
+        
+        int escmode;
+        int escs;
+    
+        int correct = 1;
+    
+        codepoints->reserve(str->length);
+        escmode = 0;
+        escs = 0;
+        for(I=0;I<str->length+1;I++)
+        {
+            c0 = str->cstring[I];
+            if(c0=='\0')
+            {
+                //end of string, terminate search
+                cp0 = 0;
+                codepoints->append(cp0);
+                break;
+            }
+    
+            else if(escmode==0 && (c0 & 0b10000000)==0)
+            {
+                //normal ASCII character
+                cp0 = (uint32_t)((unsigned char)c0);
+                codepoints->append(cp0);
+            }
+            else if(escmode==0 && (c0 & 0b11100000)==0b11000000)
+            {
+                escmode = 1; escs = 1;
+                cp0 = 0;
+                cpw = (uint32_t)(c0 & 0b00011111);
+                cp0 = cp0 + (cpw<<((uint32_t)6));
+                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
+            }
+            else if(escmode==0 && (c0 & 0b11110000)==0b11100000)
+            {
+                escmode = 2; escs = 2;
+                cp0 = 0;
+                cpw = (uint32_t)(c0 & 0b00001111);
+                cp0 = cp0 + (cpw<<((uint32_t)12));
+                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
+            }
+            else if(escmode==0 && (c0 & 0b11111000)==0b11110000)
+            {
+                escmode = 3; escs = 3;
+                cp0 = 0;
+                cpw = (uint32_t)(c0 & 0b00000111);
+                cp0 = cp0 + (cpw<<((uint32_t)18));
+                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
+            }
+    
+            else if(escmode!=0 && escs>0)
+            {
+                if((c0 & 0b11000000)==0b10000000)
+                {
+                    cpw = (uint32_t)(c0 & 0b00111111);
+                    cp0 = cp0 + (cpw<<((uint32_t)6*(escs-1)));
+                    escs--;
+                    //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
+                }
+                else
+                {
+                    //invalid escape character - this is bad UTF-8
+                    correct = 0;
+                    escs = 0;
+                    escmode = 0;
+                    //don't append anything
+                    //printf("incorrect!\n");
+                }
+            }
+    
+            if(escmode!=0 && escs==0)
+            {
+                //end escape mode, push character, return to mode 0;
+                escmode = 0;
+                escs = 0;
+                codepoints->append(cp0);
+            }
+    
+        } //for chars in string
+    
+        codepoints->shrink_to_fit();
+    
+        return correct;
+    }
+    
+    void uccodepoints_to_string(const amsarray<uint32_t> &codepoints, amsstring &str)
+    {
+        long I,J;
+        uint8_t ch;
+        uint32_t cp,bits1,bits2,bits3,bits4;
+    
+        str.resize(codepoints.length*4+1);
+        str.cstring[str.length]='\0'; //guard against last char not being \0
+    
+        J = 0;
+        for(I=0;I<codepoints.length;I++)
+        {
+            cp = codepoints[I];
+            if(cp==0)
+            {
+                str.cstring[J] = '\0';
+                J++;
+                break;
+            }
+    
+        //21 bit values
+        // 0b0xxxxxxx                               0x00000000  0x0000007F
+        // 0b110xxxxx 01xxxxxx                      0x00000080  0x000007FF
+        // 0b1110xxxx 01xxxxxx 01xxxxxx             0x00000800  0x0000FFFF
+        // 0b11110xxx 01xxxxxx 01xxxxxx 01xxxxxx    0x00010000  0x0010FFFF
+    
+            if(cp>0 && cp<=0x0000007F)
+            {
+                ch = (uint8_t) cp;
+                str.cstring[J] = ((ams_chartype)((unsigned char)ch));
+                J++;
+            }
+    
+            if(cp>=0x00000080 && cp<=0x000007FF)
+            {
+                bits1 = (cp & (0b00111111))+0b10000000;
+                bits2 = ((cp & (0b00011111<<6L))>>6)+0b11000000;
+    
+                //string.cstring[J] = ((ams_chartype)((unsigned char)bits2));
+                str.cstring[J] = bits2;
+                J++;
+                //string.cstring[J] = ((ams_chartype)((unsigned char)bits1));
+                str.cstring[J] = bits1;
+                J++;
+            }
+    
+            if(cp>=0x00000800 && cp<=0x0000FFFF)
+            {
+                bits1 = (cp & (0b00111111))+0b10000000;
+                bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
+                bits3 = ((cp & (0b00001111<<12))>>12)+0b11100000;
+    
+                str.cstring[J] = ((ams_chartype)((unsigned char)bits3));
+                J++;
+                str.cstring[J] = ((ams_chartype)((unsigned char)bits2));
+                J++;
+                str.cstring[J] = ((ams_chartype)((unsigned char)bits1));
+                J++;
+            }
+    
+            if(cp>=0x00010000 && cp<=0x010FFFFF)
+            {
+                bits1 = (cp & (0b00111111))+0b10000000;
+                bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
+                bits3 = ((cp & (0b00111111<<12))>>12)+0b10000000;
+                bits4 = ((cp & (0b00000111<<18))>>18)+0b11110000;
+    
+                str.cstring[J] = ((ams_chartype)((unsigned char)bits4));
+                J++;
+                str.cstring[J] = ((ams_chartype)((unsigned char)bits3));
+                J++;
+                str.cstring[J] = ((ams_chartype)((unsigned char)bits2));
+                J++;
+                str.cstring[J] = ((ams_chartype)((unsigned char)bits1));
+                J++;
+            }
+            
+        }
+    
+        str.shrinktofit();
+        return;
+    }
+    
+    void uccodepoints_to_string(const amsarray<uint32_t> *codepoints, amsstring *str)
+    {
+        long I,J;
+        uint8_t ch;
+        uint32_t cp,bits1,bits2,bits3,bits4;
+    
+        str->resize(codepoints->length*4+1);
+        str->cstring[str->length]='\0'; //guard against last char not being \0
+    
+        J = 0;
+        for(I=0;I<codepoints->length;I++)
+        {
+            cp = codepoints->at(I);
+            if(cp==0)
+            {
+                str->cstring[J] = '\0';
+                J++;
+                break;
+            }
+    
+        //21 bit values
+        // 0b0xxxxxxx                               0x00000000  0x0000007F
+        // 0b110xxxxx 01xxxxxx                      0x00000080  0x000007FF
+        // 0b1110xxxx 01xxxxxx 01xxxxxx             0x00000800  0x0000FFFF
+        // 0b11110xxx 01xxxxxx 01xxxxxx 01xxxxxx    0x00010000  0x0010FFFF
+    
+            if(cp>0 && cp<=0x0000007F)
+            {
+                ch = (uint8_t) cp;
+                str->cstring[J] = ((ams_chartype)((unsigned char)ch));
+                J++;
+            }
+    
+            if(cp>=0x00000080 && cp<=0x000007FF)
+            {
+                bits1 = (cp & (0b00111111))+0b10000000;
+                bits2 = ((cp & (0b00011111<<6L))>>6)+0b11000000;
+    
+                //string.cstring[J] = ((ams_chartype)((unsigned char)bits2));
+                str->cstring[J] = bits2;
+                J++;
+                //string.cstring[J] = ((ams_chartype)((unsigned char)bits1));
+                str->cstring[J] = bits1;
+                J++;
+            }
+    
+            if(cp>=0x00000800 && cp<=0x0000FFFF)
+            {
+                bits1 = (cp & (0b00111111))+0b10000000;
+                bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
+                bits3 = ((cp & (0b00001111<<12))>>12)+0b11100000;
+    
+                str->cstring[J] = ((ams_chartype)((unsigned char)bits3));
+                J++;
+                str->cstring[J] = ((ams_chartype)((unsigned char)bits2));
+                J++;
+                str->cstring[J] = ((ams_chartype)((unsigned char)bits1));
+                J++;
+            }
+    
+            if(cp>=0x00010000 && cp<=0x010FFFFF)
+            {
+                bits1 = (cp & (0b00111111))+0b10000000;
+                bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
+                bits3 = ((cp & (0b00111111<<12))>>12)+0b10000000;
+                bits4 = ((cp & (0b00000111<<18))>>18)+0b11110000;
+    
+                str->cstring[J] = ((ams_chartype)((unsigned char)bits4));
+                J++;
+                str->cstring[J] = ((ams_chartype)((unsigned char)bits3));
+                J++;
+                str->cstring[J] = ((ams_chartype)((unsigned char)bits2));
+                J++;
+                str->cstring[J] = ((ams_chartype)((unsigned char)bits1));
+                J++;
+            }
+            
+        }
+    
+        str->shrinktofit();
+        return;
+    }
+    
+    
+    void test_unicode_ascii_int_conv()
+    {
+        int I;
+        char c;
+        unsigned char uc;
+        int J1,J2;
+    
+        for(I=0;I<255;I++)
+        {
+            c = (char)I;
+            uc = (unsigned char) c;
+            J1 = (int)c;
+            J2 = (int)uc;
+    
+            printf("%d\t%c\t%c\t%d\t%d\n",I,c,uc,J1,J2);
+        }
+    
+    
+        return;
+    }
+    
+    static void test_unicode_conv1_sub(uint32_t codept)
+    {
+        long I;
+        amsarray<uint32_t> codepts1;
+        amsarray<uint32_t> codepts2;
+        amsstring s1,s2;
+    
+        codepts1.resize(1);
+        codepts1[0] = codept;
+    
+        uccodepoints_to_string(codepts1,s1);
+    
+        printf("UCC: "); _intl_print_ui32bits(codepts1[0]); printf("\n");
+        printf("STR: ");
+        for(I=0;I<s1.length;I++)
+        {
+            _intl_print_ui8bits((uint8_t)(unsigned char)s1.cstring[I]);
+            printf(" ");
+        }
+        printf("\n");
+    
+        string_to_uccodepoints(s1,codepts2);
+        printf("UCC: "); _intl_print_ui32bits(codepts2[0]); printf("\n");
+    
+        uccodepoints_to_string(codepts2,s2);
+        printf("STR: ");
+        for(I=0;I<s2.length;I++)
+        {
+            _intl_print_ui8bits(s2.cstring[I]);
+            printf(" ");
+        }
+        printf("\n");
+    
+        if(s1==s2)
+        {
+            printf("STR: PASS\t");
+        }
+        else
+        {
+            printf("STR: FAIL\t");
+        }
+        if(codepts1[0]==codepts2[0])
+        {
+            printf("UCC: PASS\n");
+        }
+        else
+        {
+            printf("UCC: FAIL\n");
+        }
+    
+        //printf("Can I print the char to terminal?: %s\n",s1.cstring);
+    
+    
+        return;
+    }
+    
+    void test_unicode_conv1()
+    {
+        uint32_t codept;
+    
+        //Test bounding cases
+    
+        codept = 0x7F;
+        test_unicode_conv1_sub(codept);
+        printf("\n\n");
+    
+        codept = 0x07FF;
+        test_unicode_conv1_sub(codept);
+        printf("\n\n");
+    
+        codept = 0xFFFF;
+        test_unicode_conv1_sub(codept);
+        printf("\n\n");
+    
+        codept = 0x0010FFFF;
+        test_unicode_conv1_sub(codept);
+        printf("\n\n");
+    
+        codept = 0x07FFFFFF;
+        test_unicode_conv1_sub(codept);
+        printf("\n\n");
+    
+        return;
+    }
+    
+    
+    static int test_unicode_conv2_sub(amsarray<uint32_t> *cp)
+    {
+        int I;
+        int pass = 1;
+        amsstring s1,s2;
+        amsarray<uint32_t> cp2;
+    
+        uccodepoints_to_string(cp,&s1);
+        string_to_uccodepoints(&s1,&cp2);
+        uccodepoints_to_string(&cp2,&s2);
+    
+        if(s1==s2 && *cp==cp2)
+        {
+            pass = 1;
+        }
+        else
+        {
+            pass = 0;
+            printf("FAIL: \n");
+    
+        }
+    
+    
+        return pass;
+    }
+    
+    static void test_gen_rand_codepts(int len, amsarray<uint32_t> *cp)
+    {
+        long I;
+        cp->resize(len+1);
+        for(I=0;I<len;I++)
+        {
+            cp->at(I) = ams::randi(1,0x0010FFFF);
+        }
+        cp->at(len) = 0;
+    
+        return;
+    }
+    
+    void test_unicode_conv2()
+    {
+        long I,J;
+        int pass;
+        amsarray<uint32_t> cp;
+    
+        long ntests = 10000;
+        long passes = 0;
+        long failures = 0;
+        
+        int len = 30;
+    
+        printf("Testing unicode to string conversion.\n");
+        printf("%ld tests of %d random codepoints each.\n",ntests,len);
+        
+        for(I=0;I<ntests;I++)
+        {
+            test_gen_rand_codepts(10,&cp);
+            if(I==0)
+            {
+                printf("ex cp string:");
+                for(J=0;J<cp.length;J++)
+                {
+                    printf("%d,",cp[J]);
+                }
+                printf("\n");
+            }
+            pass = test_unicode_conv2_sub(&cp);
+            if(pass==1)
+            {
+                passes++;
+            }
+            else
+            {
+                failures++;
+            }
+    
+            if(I%(ntests/10)==0)
+            {
+                printf("Test %ld....\n",I);
+            }
+        }
+    
+        printf("%ld tests, %ld passes, %ld failures.\n",ntests,passes,failures);
+    
+        return;
+    }
+    
+
+};
--- a/src/main.cpp
+++ b/src/main.cpp
@ -4,5 +4,6 @@ int main(int argc, char* argv[])
 {
    int ret = 0;
    printf("ams string4 library tests.\n");
+    
    return ret;
 }