copypaste

2025-06-02 09:10:50 -04:00
parent 87ccfdf279
commit eb9ae4316f
15 changed files with 3777 additions and 0 deletions
--- a/build_linux64/libamsstring4.linux64.a
+++ b/build_linux64/libamsstring4.linux64.a
--- a/build_linux64/objstore/amsstring4_class.o
+++ b/build_linux64/objstore/amsstring4_class.o
--- a/build_linux64/objstore/amsstring4_portability.o
+++ b/build_linux64/objstore/amsstring4_portability.o
--- a/build_linux64/objstore/amsstring4_tests1.o
+++ b/build_linux64/objstore/amsstring4_tests1.o
--- a/build_linux64/tests
+++ b/build_linux64/tests
--- a/include/amsstring4/amsstring4.hpp
+++ b/include/amsstring4/amsstring4.hpp
@ -4,11 +4,163 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>
 #include <stdarg.h>
 #include <vector>
 #include <string>
 #include <limits>
 #include <locale>
 #include <amsmathutil25/amsmathutil25.hpp>
 namespace ams
 {
 //wraps the functions strcpy_s and strncpy in a portable manner
 //between linux and microsoft standard C libraries.
 int amsstrcpy_s(char *dest, int size, const char *src);
 //wraps the functions sprintf_s and snprintf in a portable manner
 //between linux and microsoft standard C libraries.
 int amssprintf_s(char *s, int n, const char *format, ...);
 //Using the C library's sscanf function is more robust
 //than atod or atof. It returns valid numbers for infs and nans
 //Returns nan for any uninterpretable string
 double amsstrtonum(const char *str);
 typedef char ams_chartype;
 static const ams_chartype ams_char_cr = (ams_chartype) '\r'; //carriage return
 static const ams_chartype ams_char_lf = (ams_chartype) '\n'; //newline
 static const ams_chartype ams_char_tb = (ams_chartype) '\t'; //tab
 static const ams_chartype ams_char_nt = (ams_chartype) '\0'; //null terminator
 class amsstring
 {
 public:
    ams_chartype blank; // null terminator returned for accessing index out of bounds
    ams_chartype *cstring;
    int length;
    //length will be set to the length of the cstring not including the null terminating char
    //Basic functions
    amsstring();
    ~amsstring();
    amsstring(amsstring &other);
    amsstring& operator=(amsstring &other);
    amsstring(const amsstring &other);
    const amsstring& operator=(const amsstring &other);
    amsstring(ams_chartype *other);
    amsstring(const ams_chartype *other);
    amsstring& operator=(ams_chartype *other);
    const amsstring& operator=(const ams_chartype *other); //assign string constant to amsstring
    //const amsstring& operator=(const ams_chartype *other) const; //assign string constant to amsstring
    //const is a disease! 
    //
    //amsstring(int length);
    //amsstring(int length, const ams_chartype initchar);
    int resize(const int newlen);
    int size() const;
    ams_chartype& operator[](const int ind);
    const ams_chartype& operator[](const int ind) const;
    ams_chartype& at(const int ind);
    const ams_chartype& at(const int ind) const;
    void clear();
    void setall(const ams_chartype val, const int newlen);
    void shrinktofit();
    //string comparisons
    bool operator==(const amsstring &other) const;
    bool operator==(const char *other) const;
    bool operator!=(const amsstring &other) const;
    bool operator!=(const char *other) const;
    //string ordering comparison
    //alphebetizes strings by ASCII character
    //longer strings compare larger than shorter ones
    bool operator<(const amsstring &other) const;
    bool operator>(const amsstring &other) const;
    bool operator<(const ams_chartype *other) const;
    bool operator>(const ams_chartype *other) const;
    //Insert, Remove, and Substring
    void insert(const int ind, const amsstring other);
    void insert(const int ind, const ams_chartype *other);
    void remove(const int ind);
    void remove(const int ind1, const int ind2);
    void substring(const int ind1, const int ind2, amsstring *sout) const;
    //Append
    void append(const amsstring &other);
    void append(const ams_chartype *other);
    void append(const ams_chartype other);
    amsstring operator+(const amsstring &other);
    const amsstring operator+(const amsstring &other) const;
    amsstring operator+(const ams_chartype *other);
    const amsstring operator+(const ams_chartype *other) const;
    amsstring operator+(const ams_chartype other);
    const amsstring operator+(const ams_chartype other) const;
    //Find
    int find(const amsstring findstr, const int indstart=0, const bool casesens=1) const;
    int find(const ams_chartype *findstr, const int indstart=0, const bool casesens=1) const;
    int find(const ams_chartype c, const int indstart=0, const bool casesens=1) const;
    //formatted input
    int sprintf(int bufflen, const ams_chartype *formatstring, ...);
    void tolower();
    void toupper();
    bool isvalidnumber();
    double strtonum();
 };
 //needs work
 void splitlines(amsstring *s, std::vector<amsstring> *lns);
 void splitlines(amsstring *s, ams::amsarray<amsstring> *lns);
 void split(amsstring *s, const ams_chartype delimitchar, std::vector<amsstring> *lns);
 void split(amsstring *s, const ams_chartype delimitchar, ams::amsarray<amsstring> *lns);
 void split(amsstring *s, const ams_chartype *delimitstr, std::vector<amsstring> *lns);
 void split(amsstring *s, const ams_chartype *delimitstr, ams::amsarray<amsstring> *lns);
 void split(amsstring *s, amsstring *delimitstr, std::vector<amsstring> *lns);
 void split(amsstring *s, amsstring *delimitstr, ams::amsarray<amsstring> *lns);
 //splits a string, not counting whitespaces between non-whitespace characters
 void splitwhitespace(amsstring *s, std::vector<amsstring> *lns);
 void splitwhitespace(amsstring *s, ams::amsarray<amsstring> *lns);
 //removes all whitespace characters '\t','\r','\n' included
 //to the left and right of the string (but not in the middle)
 void stripwhitespace(amsstring *s);
 //completely removes all whitespace entirely
 void stripallwhitespace(amsstring *s);
 void freadline(FILE *fp, amsstring *s);
 void freadlines(FILE *fp, std::vector<amsstring> *lines);
 void fwritelines(FILE *fp, amsstring *s);
 void fwritelines(FILE *fp, std::vector<amsstring> *lines);
 void freadtxtfile(FILE *fp, amsstring *s);
 }; //end namespace ams
 #include <amsstring4/amsstring4_unicode.hpp>
 #include <amsstring4/amsstring4_bintextencoding.hpp>
 #include <amsstring4/amsstring4_tests.hpp>
 #endif
--- a/include/amsstring4/amsstring4_bintextencoding.hpp
+++ b/include/amsstring4/amsstring4_bintextencoding.hpp
@ -0,0 +1,28 @@
 #ifndef __AMSSTRING4_BINTEXTENCODING_HPP__
 #define __AMSSTRING4_BINTEXTENCODING_HPP__
 namespace ams
 {
 //Only processes strings of length divisible by 4, with
 //expected 0,1,2 padding chars at the end of the string,
 //and no non-coding characters.
 void base64encode(ams::amsarray<uint8_t> *bytes, amsstring *str);
 int base64decode(amsstring *str, ams::amsarray<uint8_t> *bytes, bool bstrict);
 //decodes, ignoring (as in MIME spec) all characters that are not
 //valid b64 alphabet chars, and all padding until the end of the string
 int base64decode_liberal(amsstring *str, ams::amsarray<uint8_t> *bytes);
 int base64decode_strict(amsstring *str, ams::amsarray<uint8_t> *bytes);
 void test_base64encode();
 void test_base64encode_fuzztest();
 };
 #endif
--- a/include/amsstring4/amsstring4_tests.hpp
+++ b/include/amsstring4/amsstring4_tests.hpp
@ -0,0 +1,27 @@
 #ifndef __AMSSTRING4_TESTS_HPP__
 #define __AMSSTRING4_TESTS_HPP__
 namespace ams
 {
    void amsstring3_basic_string_test1();
    void amsstring3_sscanf_test1();
    void amsstring3_basic_string_test2();
    void amsstring3_memoryleakcheck1();
    void amsstring3_memoryleakcheck2();
    void amsstring3_stringtests2();
    void amsstring3_test_find();
    void amsstring3_test_splitlines();
    void amsstring3_test_split();
    void amsstring3_test_strip();
    void amsstring3_test_freadwrite();
    void amsstring3_test_concatenation_operators();
 };
 #endif
--- a/include/amsstring4/amsstring4_unicode.hpp
+++ b/include/amsstring4/amsstring4_unicode.hpp
@ -0,0 +1,22 @@
 #ifndef __AMSSTRING4_UNICODE_HPP__
 #define __AMSSTRING4_UNICODE_HPP__
 namespace ams
 {
 int string_to_uccodepoints(const amsstring &str, amsarray<uint32_t> &codepoints);
 int string_to_uccodepoints(const amsstring *str, amsarray<uint32_t> *codepoints);
 void uccodepoints_to_string(const amsarray<uint32_t> &codepoints, amsstring &str);
 void uccodepoints_to_string(const amsarray<uint32_t> *codepoints, amsstring *str);
 void test_unicode_ascii_int_conv();
 void test_unicode_conv1();
 void test_unicode_conv2();
 };
 #endif
--- a/src/amsstring4/amsstring4_bintextencoding.cpp
+++ b/src/amsstring4/amsstring4_bintextencoding.cpp
@ -0,0 +1,565 @@
 #include <amsstring4/amsstring4.hpp>
 namespace ams
 {
 //PGP / GPG text armor, binary encoding scheme: 
 //  
 //HTML embedded image file binary encoding scheme:
 //example: 
 // <img alt="Embedded Image" width="158" height="158" 
 //   src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAJ4A..." />
 // ref: https://stackoverflow.com/questions/11474346/how-to-encode-images-within-html
 // Embed other stuff!
 // Data URIs can potentially store any type of data, not just images! Try these examples on for size: (X)HTML CSS Embedding Example
 // <link rel="stylesheet" type="text/css"
 //   href="data:text/css;base64,LyogKioqKiogVGVtcGxhdGUgKioq..." />
 // (X)HTML Javascript Embedding Example
 // <script type="text/javascript"
 //   src="data:text/javascript;base64,dmFyIHNjT2JqMSA9IG5ldyBzY3Jv..."></script>
 //I think these are both base-64 encodings of a binary blob
 //ref: https://security.stackexchange.com/questions/142043/how-are-pgp-messages-constructed
 //also widely used for email attachments
 //Base64: 3 bytes (24-bits) 11111111  22222222 33333333
 //  converted to 4 base-64 digits
 //                          111111 112222 222233 333333
 // subtleties on termination of a string of bytes that doesn't divide by 3
 //real reference:
 //  https://datatracker.ietf.org/doc/html/rfc4648#section-4
    //              Table 1: The Base 64 Alphabet
    //  Value Encoding  Value Encoding  Value Encoding  Value Encoding
    //      0 A            17 R            34 i            51 z
    //      1 B            18 S            35 j            52 0
    //      2 C            19 T            36 k            53 1
    //      3 D            20 U            37 l            54 2
    //      4 E            21 V            38 m            55 3
    //      5 F            22 W            39 n            56 4
    //      6 G            23 X            40 o            57 5
    //      7 H            24 Y            41 p            58 6
    //      8 I            25 Z            42 q            59 7
    //      9 J            26 a            43 r            60 8
    //     10 K            27 b            44 s            61 9
    //     11 L            28 c            45 t            62 +
    //     12 M            29 d            46 u            63 /
    //     13 N            30 e            47 v
    //     14 O            31 f            48 w         (pad) =
    //     15 P            32 g            49 x
    //     16 Q            33 h            50 y
 //ord is 0-63, 64 for '='
 ams_chartype base64_char(int8_t ord)
 {
    ams_chartype ret;
    if(ord==64)
    {
        ret = '=';
    }
    else if(ord>=0&&ord<26)
    {
        ret = (ams_chartype)(ord+65);
    }
    else if(ord>=26&&ord<52)
    {
        ret = (ams_chartype)((ord-26)+97);
    }
    else if(ord>=52&&ord<62)
    {
        ret = (ams_chartype)((ord-52)+48);
    }
    else if(ord==62)
        ret = '+';
    else if(ord==63)
        ret = '/';
    else
        ret = '\0'; //NULL character is for an out of range ord
    return ret;
 }
 int8_t base64_ord(ams_chartype ch)
 {
    uint8_t ret;
    uint8_t ucval = (unsigned char) ch;
    if(ucval>=65 && ucval<91)
    {
        ret = ucval-65;
    }
    else if(ucval>=97 && ucval<123)
    {
        ret = ucval-97+26;
    }
    else if(ucval>=48 && ucval<58)
    {
        ret = ucval-48+52;
    }
    else if(ch=='+')
    {
        ret = 62;
    }
    else if(ch=='/')
    {
        ret = 63;
    }
    else if(ch=='=')
    {
        ret = 64;
    }
    else
    {
        ret = -1;
    }
    return ret;
 }
 static void b64_encode_3byteblock(uint8_t *bytes, ams_chartype *chars)
 {
    uint8_t o1,o2,o3,o4;
    o1 = ((bytes[0] & 0b11111100)>>2);
    o2 = (((bytes[0] & 0b00000011)<<4) + ((bytes[1] & 0b11110000)>>4));
    o3 = (((bytes[1] & 0b00001111)<<2) + ((bytes[2] & 0b11000000)>>6));
    o4 = ((bytes[2] & 0b00111111));
    chars[0] = base64_char(o1);
    chars[1] = base64_char(o2);
    chars[2] = base64_char(o3);
    chars[3] = base64_char(o4);
    return;
 }
 static void b64_encode_2byteblock(uint8_t *bytes, ams_chartype *chars)
 {
    uint8_t o1,o2,o3;
    o1 = ((bytes[0] & 0b11111100)>>2);
    o2 = (((bytes[0] & 0b00000011)<<4) + ((bytes[1] & 0b11110000)>>4));
    o3 = (((bytes[1] & 0b00001111)<<2));
    chars[0] = base64_char(o1);
    chars[1] = base64_char(o2);
    chars[2] = base64_char(o3);
    chars[3] = '=';
    return;
 }
 static void b64_encode_1byteblock(uint8_t *bytes, ams_chartype *chars)
 {
    uint8_t o1,o2;
    o1 = ((bytes[0] & 0b11111100)>>2);
    o2 = (((bytes[0] & 0b00000011)<<4));
    chars[0] = base64_char(o1);
    chars[1] = base64_char(o2);
    chars[2] = '=';
    chars[3] = '=';
    return;
 }
 static int decode_4charblock(ams_chartype *chars, uint8_t *bytes)
 {
    int ret = 1;
    uint8_t o1,o2,o3,o4,b1,b2,b3;
    int len;
    o1 = base64_ord(chars[0]);
    o2 = base64_ord(chars[1]);
    o3 = base64_ord(chars[2]);
    o4 = base64_ord(chars[3]);
    len = 3;
    if(chars[3]=='=') {len--; o4 = 0;}
    if(chars[2]=='=') {len--; o3 = 0;}
    if(chars[1]=='=') {len--; o2 = 0;}
    if(len>0)
    {
        //111111 222222 333333 444444
        //11111122 22223333 33444444
        b1 = ((o1<<2)+((o2 & 0b00110000)>>4));
        b2 = (((o2 & 0b00001111)<<4) + ((o3 & 0b00111100)>>2));
        b3 = (((o3 & 0b00000011)<<6) + (o4 & 0b00111111));
        if(len>=1) bytes[0] = b1;
        if(len>=2) bytes[1] = b2;
        if(len>=3) bytes[2] = b3;
    }
    return ret;
 }
 void base64encode(ams::amsarray<uint8_t> *bytes, amsstring *str)
 {
    long I,J;
    //str->resize((bytes->length*4)/3+2);
    if( bytes->length % 3 == 0)
    {
        str->resize((bytes->length/3)*4);
    }
    else
    {
        str->resize((bytes->length/3+1)*4);
    }
    I = 0; J = 0;
    while(I<bytes->length)
    {
        if(bytes->length-I>=3)
        {
            //printf("debug3: %c%c%c\n",bytes->data[I],bytes->data[I+1],bytes->data[I+2]);
            b64_encode_3byteblock(&(bytes->data[I]),&(str->cstring[J]));
            I = I + 3;
            J = J + 4;
        }
        else if(bytes->length-I==2)
        {
            //printf("debug2: %c%c\n",bytes->data[I],bytes->data[I+1]);
            b64_encode_2byteblock(&(bytes->data[I]),&(str->cstring[J]));
            I = I + 2;
            J = J + 4;
        }
        else if(bytes->length-I==1)
        {
            //printf("debug1: %c\n",bytes->data[I]);
            b64_encode_1byteblock(&(bytes->data[I]),&(str->cstring[J]));
            I = I + 1;
            J = J + 4;
        }
        else
        {
            break;
        }
    }
    //str->shrinktofit();
    str->cstring[str->length] = '\0';
    return;
 }
 static long __intl_localstrlen(amsstring *str)
 {
    long ret = 0;
    long I;
    for(I=0;I<str->length;I++)
    {
        if(str->cstring[I]=='\0')
        {
            ret = I;
            break;
        }
    }
    return ret;
 }
 //Only processes strings of length divisible by 4, with
 //expected 0,1,2 padding chars at the end of the string,
 //and no non-coding characters.
 int base64decode_strict(amsstring *str, ams::amsarray<uint8_t> *bytes)
 {
    int ret = 1;
    int fail = 0;
    long len;
    long I,J;
    uint8_t v;
    int bp;
    if(str->length == 0)
    {
        bytes->resize(0);
        ret = 1;
        return ret;
    }
    if(str->length % 4 !=0)
    {
        ret = -1; //invalid length
        bytes->resize(0);
        return ret;
    }
    //resize bytes to indicated size
    len = (str->length/4)*3;
    if(str->cstring[str->length-1]=='=') len--;
    if(str->cstring[str->length-2]=='=') len--;
    if(str->cstring[str->length-3]=='=')
    {
        ret = -2; //unexpected number of padding chars
        bytes->resize(0);
        return ret;
    }
    bytes->resize(len);
    fail = 0;
    I = 0; J = 0;
    //for(I=0;I<str->length && fail==0;I++)
    while(I<str->length && fail==0)
    {
        for(bp=0;bp<4;bp++)
        {
            v = base64_ord(str->cstring[I+bp]);
            //printf("debug: %d %d %c %d\n",I,I+bp,str->cstring[I+bp],v);
            if(v<0 || (v>=64 && (I+bp)<str->length-2) || v>=65)
            {
                //printf("dbg: fail!\n");
                fail = 1;
                break;
            }
        }
        decode_4charblock(&(str->cstring[I]),&(bytes->data[J]));
        // for(bp=0;bp<3;bp++)
        // {
        //     printf("debug: %d %d %c\n",J+bp,bytes->data[J+bp],bytes->data[J+bp]);
        // }
        I = I + 4;
        J = J + 3;
    }
    if(fail==1)
    {
        ret = -3; //invalid char encountered
        bytes->resize(0);
    }
    //fail on any non-coding chars while parsing
    return ret;
 }
 //decodes, ignoring (as in MIME spec) all characters that are not
 //valid b64 alphabet chars, and all padding until the end of the string
 int base64decode_liberal(amsstring *str, ams::amsarray<uint8_t> *bytes)
 {
    int ret = 1;
    long I,J;
    amsstring str2;
    ams_chartype ch;
    int64_t v;
    str2.resize(str->length+4);
    //filter chars in str to remove all non-coding chars
    J = 0;
    for(I=0;I<str->length;I++)
    {
        ch = str->cstring[I];
        if(ch=='\0')
            break;
        v = base64_ord(ch);
        if(v>=0 && v<64)
        {
            str2.cstring[J] = ch;
            J = J + 1;
        }
    }
    if(J%4==0)
    {
        str2.resize(J);
        str2.cstring[str2.length] = '\0';
    }
    if(J%4==1)
    {
        ret = 0; //this isn't really valid
        str2.cstring[J] = '='; J++;
        str2.cstring[J] = '='; J++;
        str2.cstring[J] = '='; J++;
        str2.resize(J);
        str2.cstring[str2.length] = '\0';
    }
    if(J%4==2)
    {
        str2.cstring[J] = '='; J++;
        str2.cstring[J] = '='; J++;
        str2.resize(J);
        str2.cstring[str2.length] = '\0';
    }
    if(J%4==3)
    {
        str2.cstring[J] = '='; J++;
        str2.resize(J);
        str2.cstring[str2.length] = '\0';
    }
    str2.shrinktofit();
    ret = base64decode_strict(&str2,bytes);
    return ret;
 }
 static void _intl_convsb(amsstring *s, amsarray<uint8_t> *b)
 {
    long I;
    b->resize(s->length);
    for(I=0;I<s->length;I++)
    {
        b->at(I) = (unsigned char) s->cstring[I];
    }
    return;
 }
 static void _intl_convbs(amsarray<uint8_t> *b,amsstring *s)
 {
    long I;
    s->resize(b->length);
    for(I=0;I<b->length;I++)
    {
        s->cstring[I] = b->data[I];
    }
    s->cstring[s->length] = '\0';
    return;
 }
 void test_base64encode1()
 {
    int I;
    int8_t o1,o2;
    ams_chartype c1,c2;
    for(I=-5;I<70;I++)
    {
        o1 = I;
        c1 = base64_char(o1);
        o2 = base64_ord(c1);
        c2 = base64_char(o2);
        printf("%d %d %c %d %c\n",(int)I,(int)o1,c1,(int)o2,c2);
    }
    return;
 }
 //segmentation faults in decode_liberal
 void test_base64encode()
 {
    printf("Tests of base64 encoding/deconding.\n");
    amsstring s1,s1e,s2;
    amsarray<uint8_t> b1,b2;
    int ret = 0;
    bool bstrict = 1;
    s1 = "light work.";
    _intl_convsb(&s1,&b1);
    base64encode(&b1,&s1e);
    ret = base64decode(&s1e,&b2,bstrict);
    _intl_convbs(&b2,&s2);
    // for(I=0;I<b1.length;I++)
    //     printf("%d:",b1.at(I));
    // printf("\n");
    printf("Original: '%s'\n",s1.cstring);
    printf("Encoded:  '%s'\n",s1e.cstring);
    printf("Decoded:  '%s', ret=%d\n",s2.cstring,ret);
    s1 = "light work";
    _intl_convsb(&s1,&b1);
    base64encode(&b1,&s1e);
    ret = base64decode(&s1e,&b2,bstrict);
    _intl_convbs(&b2,&s2);
    // for(I=0;I<b1.length;I++)
    //     printf("%d:",b1.at(I));
    // printf("\n");
    printf("Original: '%s'\n",s1.cstring);
    printf("Encoded:  '%s'\n",s1e.cstring);
    printf("Decoded:  '%s', ret=%d\n",s2.cstring,ret);
    s1 = "light wor";
    _intl_convsb(&s1,&b1);
    base64encode(&b1,&s1e);
    s1e.insert(3,"\n");
    s1e.insert(5,"\t");
    s1e.insert(7,"}");
    //s1e.insert(1,"}");
    ret = base64decode(&s1e,&b2,bstrict);
    //ret = base64decode_strict(&s1e,&b2);
    _intl_convbs(&b2,&s2);
    // for(I=0;I<b1.length;I++)
    //     printf("%d:",b1.at(I));
    // printf("\n");
    printf("Original: '%s'\n",s1.cstring);
    printf("Encoded:  '%s'\n",s1e.cstring);
    printf("Decoded:  '%s', ret=%d\n",s2.cstring,ret);
    return;
 }
 void test_base64encode_fuzztest()
 {
    long I,J;
    ams::amsarray<uint8_t> bytes,bytes2;
    amsstring str,str2;
    int passed = 0;
    int failed = 0;
    for(I=0;I<100;I++)
    {
        printf("Test %d\n",(int)I);
        bytes.resize(I);
        for(J=0;J<bytes.size();J++)
        {
            bytes.data[J] = randd()*255;
        }
        base64encode(&bytes,&str);
        base64decode(&str,&bytes2,1);
        base64encode(&bytes2,&str2);
        if(bytes==bytes2 && str==str2)
        {
            passed++;
        }
        else
        {
            failed++;
        }
    }
    printf("passed: %d, failed %d\n",passed,failed);
    return;
 }
 int base64decode(amsstring *str, ams::amsarray<uint8_t> *bytes, bool bstrict=0)
 {
    int ret = 0;
    if(bstrict==1)
    {
        ret = base64decode_strict(str,bytes);
    }
    else
    {
        ret = base64decode_liberal(str,bytes);
    }
    return ret;
 }
 };
--- a/src/amsstring4/amsstring4_class.cpp
+++ b/src/amsstring4/amsstring4_class.cpp
--- a/src/amsstring4/amsstring4_portability.cpp
+++ b/src/amsstring4/amsstring4_portability.cpp
@ -0,0 +1,177 @@
 #include <amsstring4/amsstring4.hpp>
 namespace ams
 {
 //snprintf, vsnprintf should now be part of the C++ standard library
 //as of C++11, so I don't think I need quite as elaborate a compatibility
 //shim as in the previous library.
 //It *should* compile with MinGW and Visual Studio.
 //src must be a NULL terminated string, or have more indices than the size of the destination buffer
 //I'm seeing platform specific memory leaks in the strncpy and strcpy_s implementations in valgrind
 //This is a dirt simple function, I shouldn't *need* to depend on a platform specific implementation,
 //... so, writing my own.
 //
 //This function copies the string src to dest. 
 //It stops when either size-1 characters have been copied to
 //dest, or a null terminator has been encountered in src.
 //
 //The return value is the number of characters copied, excluding the null terminator
 //or an error code.
 //
 //All additional positions in dest are padded with null terminators. Size is intended to be the size
 //of the dest buffer.
 int amsstrcpy_s(char *dest, int size, const char *src)
 {
  int ret = 0;
  int I=0;
  char c='\0';
  if(dest==NULL)
  {
    ret = -2;
    return ret;
  }
  if(src==NULL)
  {
    ret = -1;
    return ret;
  }
  for(I=0;I<size-1;I++)
  {
    c = src[I];
    ret = I;
    if(c=='\0')
    {
      dest[I] = '\0';
      break;
    }
    dest[I] = c;
  }
  for(I=ret+1;I<size;I++)
  {
    dest[I] = '\0';
  }
  dest[size-1] = '\0';
  return ret;
 }
 //wrapper for strcpy_s and strncpy which should be portable between gnu and microsoft C libraries
 //strcpy_s
 //strncpy
 // int amsstrcpy_s(char *dest, int size, const char *src)
 // {
 //   int ret = 0;
 //   if(dest!=NULL)
 //   {
 //     if(src!=NULL)
 //     {
 //         #if defined(LINUX) || defined(linux) || defined(__linux__) || defined(__GNUC__)
 //             //use strncpy
 //             strncpy(dest,src,size);
 //             ret = 0;
 //             if(size>0)
 //             {
 //               dest[size-1] = '\0';
 //             }
 //         #elif defined(__MINGW32__) || defined(__MINGW64__) || defined(_WIN32)
 //             //use strcpy_s
 //             //ret = (int)strcpy_s(dest,size,src);
 //             strcpy_s(dest,size,src);
 //             ret = 0;
 //             if(size>0)
 //             {
 //               dest[size-1] = '\0';
 //             }
 //         #else
 //             #pragma message("amsstrcpy_s: Unsupported architecture - neither linux nor mingw64 nor msvc")
 //         #endif
 //     }
 //     else
 //     {
 //         if(size>0)
 //         {
 //             dest[0] = '\0';
 //             ret = -2; //src was NULL
 //         }
 //     }
 //   }
 //   else
 //   {
 //       ret = -1; //dest was a null pointer 
 //   }
 //   return ret;
 // }
 //sprintf_s
 //snprintf
 //
 int amssprintf_s(char *s, int n, const char *format, ...)
 {
  int ret = 0;
  va_list args;
  va_start(args, format);
  //exampleV(b, args);
  //va_arg(val,datatype)
  if(s!=NULL)
  {
    #if defined(LINUX) || defined(linux) || defined(__linux__) || defined(__GNUC__)
        //use snprintf
        ret = (int)vsnprintf(s,n,format,args);
        s[n-1] = '\0';
    #elif defined(__MINGW32__) || defined(__MINGW64__) || defined(_WIN32)
        //use sprintf_s
        ret = (int)vsprintf_s(s,n,format,args);
    #else
        #pragma message("amssprintf_s: Unsupported architecture - neither linux nor mingw64 nor msvc")
    #endif
  }
  va_end(args);
  return ret;
 }
 //Using the C library's sscanf function is more robust
 //than atod or atof. It returns valid numbers for infs and nans
 //Returns nan for any uninterpretable string
 double amsstrtonum(const char *str)
 {
  int q;
  double ret = std::numeric_limits<double>::quiet_NaN();
  try
  {
    //sscanf(s.cstring,"%lf",&ret);
    //stod(const std::string& str, std::size_t* pos)
    //calls std::strtod
    //strtod(const char *, &ptr)
    //wcstrtod
    //ret = atof(str); //old c-style parser
    q = sscanf(str,"%lf",&ret);
    if(q==0)
    {
      ret = std::numeric_limits<double>::quiet_NaN();
    }
  }
  catch(int e)
  {
    ret = std::numeric_limits<double>::quiet_NaN();
  }
  return ret;
 }
 };
--- a/src/amsstring4/amsstring4_tests1.cpp
+++ b/src/amsstring4/amsstring4_tests1.cpp
@ -0,0 +1,475 @@
 #include <amsstring4/amsstring4.hpp>
 namespace ams
 {
    void amsstring3_basic_string_test1()
    {
        char q1,c;
        unsigned char q2;
        uint8_t q3;
        int I;
        printf("Basic string tests1.\n");
        for(I=-127;I<256;I++)
        {
            c = (char)I;
            q2 = (unsigned char) I;
            //printf("I=%d  %d %c %02x %02x %02x\n",I,(int)c,c,c,(unsigned char)c,I);
            printf("I=%d, %c, %02x\n",I,q2,q2);
        }
        q2 = (unsigned char) '\r';
        printf("\nLF: %c After LF %c After LF2 \n",q2,q2);
    }
    void amsstring3_sscanf_test1()
    {
        char buf[500];
        double d;
        int q;
        // //vsnprintf(buf,500," -123.456E10 ");
        // snprintf(buf,500," -123.456E10 ");
        // //sprintf_s(buf,500," -123.456E10 ");
        // q=sscanf(buf,"%lf",&d);
        // printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
        // snprintf(buf,500," -inf ");
        // //sprintf_s(buf,500," -123.456E10 ");
        // q=sscanf(buf,"%lf",&d);
        // printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
        // snprintf(buf,500,"3");
        // //sprintf_s(buf,500," -123.456E10 ");
        // q=sscanf(buf,"%lf",&d);
        // printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
        // snprintf(buf,500," #.QUAN0 ");
        // //sprintf_s(buf,500," -123.456E10 ");
        // q=sscanf(buf,"%lf",&d);
        // printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
        // snprintf(buf,500,"nan");
        // //sprintf_s(buf,500," -123.456E10 ");
        // q=sscanf(buf,"%lf",&d);
        // printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
        amssprintf_s(buf,500,"-3");
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
        amssprintf_s(buf,500,"   -3  ");
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
        amssprintf_s(buf,500,"\t\t-3\t\n  ");
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
        amssprintf_s(buf,500,"   +3E+1  ");
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
        amssprintf_s(buf,500,"2,3,4");
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
        amssprintf_s(buf,500,"inf");
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
        amssprintf_s(buf,500,"-inf");
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
        amssprintf_s(buf,500,"nan");
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
        amssprintf_s(buf,500,"1.0*4E3");
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
        amssprintf_s(buf,2,"2,3,4");
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
        amssprintf_s(buf,2,NULL);
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
        amssprintf_s(NULL,2,"100");
        d = amsstrtonum(buf);
        printf("String %s reads as %1.4g\n",buf,d);
    }
    void amsstring3_basic_string_test2()
    {
        amsstring s1,s2;
        //const amsstring s3; //don't do this - just accept that strings must be mutable
        s1="Hello world";
        s2 = s1;
        printf("%d %c\n",(ams_chartype) '\0', (ams_chartype) '\0');
        printf("s1: '%s', s2: '%s'\n",s1.cstring,s2.cstring);
        int I;
        for(I=-5;I<s2.size()+5;I++)
        {
            printf("s2[%d]: %d, %c \n",I,s2[I],s2[I]);
        }
        for(I=-5;I<s2.size()+5;I++)
        {
            s1[I] = 'a';
        };
        printf("s1 = %s\n",s1.cstring);
        for(I=-5;I<s2.size()+5;I++)
        {
            printf("s1[%d]: %d, %c \n",I,s1[I],s1[I]);
        }
        printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
        s1 = "Hello";
        s2 = "Hello";
        printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
        s2.resize(15);
        s2[10] = 'b';
        //s2[5] = 'b';
        //s2[6] = 'b';
        printf("s1.size()=%d, s2.size()=%d\n",s1.size(),s2.size());
        printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
        s2[10] = 'a';
        s2.shrinktofit();
        printf("s1.size()=%d, s2.size()=%d\n",s1.size(),s2.size());
        printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
        s1 = "hello"; s2 = "Hello";
        printf("s1:'%s'>s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1>s2);
        s1 = "hello"; s2 = "Hello";
        printf("s1:'%s'<s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1<s2);
        s1 = "Hello"; s2 = "Hello";
        printf("s1:'%s'>s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1>s2);
        //s3 = "Hello constant world.";
        //printf("s3= '%s'\n",s3.cstring);
    }
    void amsstring3_memoryleakcheck1()
    {
        amsstring q1,q2,q3;
        int I;
        q1.sprintf(1000,"%1.100g,",ams::pi);
        printf("q1='%s'\n",q1.cstring);
        printf("q1.size()=%d\n",q1.size());
        q2 = q1;
        for(I=0;I<100;I++)
        {
            q1.substring(0,q1.length-1,&q1);
            printf("q1 = substr; q1='%s'\n",q1.cstring);
        }
        q1 = q2;
        q1.resize(10000000);
        for(I=0;I<20;I++)
        {
            printf("resize test %d\n",(int)I);
            q2 = q1;
            q2.resize(10000000);
            q3 = q2;
            q3.resize(10000000);
            q1 = q3;
            q1.resize(10000000);
        }
        return;
    }
    void amsstring3_memoryleakcheck2()
    {
        int I;
        amsstring q1;
        for(I=0;I<100;I++)
        {
            q1.sprintf(4,"%1.100g",ams::pi);
        }
        printf("q1=%s\n",q1.cstring);
    }
    void amsstring3_stringtests2()
    {
        amsstring q1,q2;
        int I;
        amsarray<amsstring> qarr;
        q1.insert(0,"Hello world");
        printf("q1='%s'\n",q1.cstring);
        for(I=-2;I<15;I++)
        {
            q2 = q1;
            q2.insert(I,"<insert>");
            printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' size=%d\n",I,q2.cstring,q2.size());
        }
        q1 = "Hello world.";
        for(I=-5;I<15;I++)
        {
            q2 = q1;
            q2.remove(I);
            printf("q2.remove(%d) = '%s'\n",I,q2.cstring);
        }
        for(I=-5;I<15;I++)
        {
            q2 = q1;
            q2.remove(I,I+2);
            printf("q2.remove(%d,%d) = '%s'\n",I,I+2,q2.cstring);
        }
        q1 = "Hello hEllo 1,2,3;";
        printf("q1='%s'\n",q1.cstring);
        q1.tolower();
        printf("q1='%s'\n",q1.cstring);
        q1.toupper();
        printf("q1='%s'\n",q1.cstring);
        q1.append("hello more appened stuff...");
        printf("q1='%s'\n",q1.cstring);
        q1.substring(-5,5,&q1);
        printf("q1='%s'\n",q1.cstring);
        q1 = "Hello 1,2,3";
        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
        q1 = "   3.1415   ";
        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
        q1 = "-inf";
        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
        q1 = "nan";
        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
        q1 = "1.1E1,2.2E2";
        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
        q1 = ",1,2,3";
        printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
        // qarr.resize(1000);
        // for(I=0;I<1000;I++)
        // {
        //     qarr[I].sprintf(1000,"%1.500g\n",ams::pi);
        //     qarr[I].resize(1000000);
        // }
        // printf("%d",qarr[0].size());
        q1 = "Hello world";
        for(I=-2;I<15;I++)
        {
            q1.substring(I,I+3,&q2);
            printf("q1[%d:%d] = '%s' size=%d\n",I,I+3,q2.cstring,q2.size());
        }
        return;
    }
    void amsstring3_test_find()
    {
        amsstring q1,q2,q3;
        int I;
        q1 = "hello world";
        q3 = "<inser";
        printf("q3='%s'\n",q3.cstring);
        for(I=-1;I<13;I++)
        {
            q2 = q1;
            q2.insert(I,"<insert>");
            printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' q2.find(q3)=%d\n",I,q2.cstring,q2.find(q3,0,0));
        }
        q3 = "<inSeRt>";
        printf("\nq3='%s'\n",q3.cstring);
        for(I=-1;I<13;I++)
        {
            q2 = q1;
            q2.insert(I,"<insert>");
            printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' q2.find(q3)=%d\n",I,q2.cstring,q2.find(q3,0,0));
        }
        q3 = "<insert> ";
        printf("\nq3='%s'\n",q3.cstring);
        for(I=-1;I<13;I++)
        {
            q2 = q1;
            q2.insert(I,"<insert>");
            printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' q2.find(q3)=%d\n",I,q2.cstring,q2.find(q3,0,0));
        }
        return;
    }
    void amsstring3_test_splitlines()
    {
        int I;
        amsstring q1;
        amsarray<amsstring> lns;
        std::vector<amsstring> lns2;
        q1 = "This is a \n string on \n multiple \r\n lines\n\n with CR\\LFs\n";
        //q1 = "\n\n";
        //q1 = "";
        //q1 = "More malformed\r string nonsense\n\r\n\r\r\na";
        printf("q1='%s'\n",q1.cstring);
        splitlines(&q1,&lns2);
        for(I=0;I<lns2.size();I++)
        {
            printf("Line %d: '%s'\n",I,lns2[I].cstring);
        }
        return;
    }
    void amsstring3_test_split()
    {
        amsstring q1;
        std::vector<amsstring> strs;
        int I;
        q1 = "this is a string  to split  ";
        printf("string='%s'\n",q1.cstring);
        split(&q1," ",&strs);
        for(I=0;I<strs.size();I++)
        {
            printf("S[%d]: '%s'\n",I,strs[I].cstring);
        }
        q1 = "A\tbunch of tab\tseparated \tvariables\t";
        printf("string='%s'\n",q1.cstring);
        split(&q1,"\t",&strs);
        for(I=0;I<strs.size();I++)
        {
            printf("S[%d]: '%s'\n",I,strs[I].cstring);
        }
        q1 = "Delimiter is abcd, a ab abcd qabcqdqabcdq";
        printf("string='%s'\n",q1.cstring);
        split(&q1,"abcd",&strs);
        for(I=0;I<strs.size();I++)
        {
            printf("S[%d]: '%s'\n",I,strs[I].cstring);
        }
        q1 = "abc";
        printf("string='%s'\n",q1.cstring);
        split(&q1,"abcd",&strs);
        for(I=0;I<strs.size();I++)
        {
            printf("S[%d]: '%s'\n",I,strs[I].cstring);
        }
        q1 = "";
        printf("string='%s'\n",q1.cstring);
        split(&q1,"abcd",&strs);
        for(I=0;I<strs.size();I++)
        {
            printf("S[%d]: '%s'\n",I,strs[I].cstring);
        }
        q1 = "A string not to split.";
        printf("string='%s'\n",q1.cstring);
        split(&q1,"",&strs);
        for(I=0;I<strs.size();I++)
        {
            printf("S[%d]: '%s'\n",I,strs[I].cstring);
        }
        q1 = "A  string to  split\tby\t \twhitespace";
        printf("string='%s'\n",q1.cstring);
        splitwhitespace(&q1,&strs);
        for(I=0;I<strs.size();I++)
        {
            printf("S[%d]: '%s'\n",I,strs[I].cstring);
        }
        return;
    }
    void amsstring3_test_strip()
    {
        amsstring q1;
        std::vector<amsstring> strs;
        int I;
        q1 = "   ";
        printf("string ws  ='%s'\n",q1.cstring);
        stripwhitespace(&q1);
        printf("string nows='%s'\n",q1.cstring);
        q1 = "\t  something = something else\t ";
        printf("string ws  ='%s'\n",q1.cstring);
        stripwhitespace(&q1);
        printf("string nows='%s'\n",q1.cstring);
        stripwhitespace(NULL);
        q1 = "\t  something = something else\t ";
        printf("string ws   ='%s'\n",q1.cstring);
        stripallwhitespace(&q1);
        printf("string allws='%s'\n",q1.cstring);
        return;
    }
    void amsstring3_test_freadwrite()
    {
        FILE *fp = NULL;
        FILE *fp2 = NULL;
        int I;
        amsstring q;
        std::vector<amsstring> q2;
        fp = fopen("../ref/0p375_hexbolt.scad","r");
        fp2 = fopen("../ref/testrewrite.scad","w+");
        I = 0;
        while(!feof(fp))
        {
            freadline(fp,&q);
            printf("Line %d: '%s'\n",I,q.cstring);
            I = I + 1;
        }
        fseek(fp,SEEK_SET,0);    
        freadlines(fp,&q2);
        for(I=0;I<q2.size();I++)
        {
            printf("Line %d: '%s'\n",I,q2[I].cstring);
        }
        //q = "This is a test file\nto write\n\thello\n\n";
        //fwritelines(fp2,&q);
        fwritelines(fp2,&q2);
        fclose(fp);
        fclose(fp2);
        return;
    }
    void amsstring3_test_concatenation_operators()
    {
        ams::amsstring a,b,c,d;
        a = "";
        a = a + "hello";
        a = a+ " world\n";
        b = a+a;
        c = b+a;
        printf("%s\n",b.cstring);
        return;
    }
 };
--- a/src/amsstring4/amsstring4_unicode.cpp
+++ b/src/amsstring4/amsstring4_unicode.cpp
@ -0,0 +1,638 @@
 #include <amsstring4/amsstring4.hpp>
 namespace ams
 {
    //UC codepoints
    //0x00 to 0x10FFFF (~24 bits, with the remainder being escape sequences and the like)
    //different processors order bytes differently (endianness)
    //UTF-8
    // 0x00 - 0x7F: 1 byte
    // 0x00: U+0000 - only when representing the null character
    //21 bit values
    // 0b0xxxxxxx                               0x00000000  0x0000007F
    // 0b110xxxxx 01xxxxxx                      0x00000080  0x000007FF
    // 0b1110xxxx 01xxxxxx 01xxxxxx             0x00000800  0x0000FFFF
    // 0b11110xxx 01xxxxxx 01xxxxxx 01xxxxxx    0x00010000  0x0010FFFF
    static void _intl_print_ui32bits(uint32_t q)
    {
        int I;
        for(I=32-1;I>=24;I--)
        {
            if((q & 1<<I)!=0)
            {
                printf("1");
            }
            else
            {
                printf("0");
            }
        }
        printf(" ");
        for(I=24-1;I>=16;I--)
        {
            if((q & 1<<I)!=0)
            {
                printf("1");
            }
            else
            {
                printf("0");
            }
        }
        printf(" ");
        for(I=16-1;I>=8;I--)
        {
            if((q & 1<<I)!=0)
            {
                printf("1");
            }
            else
            {
                printf("0");
            }
        }
        printf(" ");
        for(I=8-1;I>=0;I--)
        {
            if((q & 1<<I)!=0)
            {
                printf("1");
            }
            else
            {
                printf("0");
            }
        }
        return;
    }
    static void _intl_print_ui8bits(uint8_t q)
    {
        int I;
        for(I=8-1;I>=0;I--)
        {
            if((q & 1<<I)!=0)
            {
                printf("1");
            }
            else
            {
                printf("0");
            }
        }
        return;
    }
    int string_to_uccodepoints(const amsstring &str, amsarray<uint32_t> &codepoints)
    {
        long I,J;
        ams_chartype c0;
        uint32_t cp0,cpw;
        int escmode;
        int escs;
        int correct = 1;
        codepoints.reserve(str.length);
        escmode = 0;
        escs = 0;
        for(I=0;I<str.length+1;I++)
        {
            c0 = str.cstring[I];
            if(c0=='\0')
            {
                //end of string, terminate search
                cp0 = 0;
                codepoints.append(cp0);
                break;
            }
            else if(escmode==0 && (c0 & 0b10000000)==0)
            {
                //normal ASCII character
                cp0 = (uint32_t)((unsigned char)c0);
                codepoints.append(cp0);
            }
            else if(escmode==0 && (c0 & 0b11100000)==0b11000000)
            {
                escmode = 1; escs = 1;
                cp0 = 0;
                cpw = (uint32_t)(c0 & 0b00011111);
                cp0 = cp0 + (cpw<<((uint32_t)6));
                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
            }
            else if(escmode==0 && (c0 & 0b11110000)==0b11100000)
            {
                escmode = 2; escs = 2;
                cp0 = 0;
                cpw = (uint32_t)(c0 & 0b00001111);
                cp0 = cp0 + (cpw<<((uint32_t)12));
                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
            }
            else if(escmode==0 && (c0 & 0b11111000)==0b11110000)
            {
                escmode = 3; escs = 3;
                cp0 = 0;
                cpw = (uint32_t)(c0 & 0b00000111);
                cp0 = cp0 + (cpw<<((uint32_t)18));
                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
            }
            else if(escmode!=0 && escs>0)
            {
                if((c0 & 0b11000000)==0b10000000)
                {
                    cpw = (uint32_t)(c0 & 0b00111111);
                    cp0 = cp0 + (cpw<<((uint32_t)6*(escs-1)));
                    escs--;
                    //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
                }
                else
                {
                    //invalid escape character - this is bad UTF-8
                    correct = 0;
                    escs = 0;
                    escmode = 0;
                    //don't append anything
                    //printf("incorrect!\n");
                }
            }
            if(escmode!=0 && escs==0)
            {
                //end escape mode, push character, return to mode 0;
                escmode = 0;
                escs = 0;
                codepoints.append(cp0);
            }
        } //for chars in string
        codepoints.shrink_to_fit();
        return correct;
    }
    int string_to_uccodepoints(const amsstring *str, amsarray<uint32_t> *codepoints)
    {
        long I,J;
        ams_chartype c0;
        uint32_t cp0,cpw;
        int escmode;
        int escs;
        int correct = 1;
        codepoints->reserve(str->length);
        escmode = 0;
        escs = 0;
        for(I=0;I<str->length+1;I++)
        {
            c0 = str->cstring[I];
            if(c0=='\0')
            {
                //end of string, terminate search
                cp0 = 0;
                codepoints->append(cp0);
                break;
            }
            else if(escmode==0 && (c0 & 0b10000000)==0)
            {
                //normal ASCII character
                cp0 = (uint32_t)((unsigned char)c0);
                codepoints->append(cp0);
            }
            else if(escmode==0 && (c0 & 0b11100000)==0b11000000)
            {
                escmode = 1; escs = 1;
                cp0 = 0;
                cpw = (uint32_t)(c0 & 0b00011111);
                cp0 = cp0 + (cpw<<((uint32_t)6));
                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
            }
            else if(escmode==0 && (c0 & 0b11110000)==0b11100000)
            {
                escmode = 2; escs = 2;
                cp0 = 0;
                cpw = (uint32_t)(c0 & 0b00001111);
                cp0 = cp0 + (cpw<<((uint32_t)12));
                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
            }
            else if(escmode==0 && (c0 & 0b11111000)==0b11110000)
            {
                escmode = 3; escs = 3;
                cp0 = 0;
                cpw = (uint32_t)(c0 & 0b00000111);
                cp0 = cp0 + (cpw<<((uint32_t)18));
                //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
            }
            else if(escmode!=0 && escs>0)
            {
                if((c0 & 0b11000000)==0b10000000)
                {
                    cpw = (uint32_t)(c0 & 0b00111111);
                    cp0 = cp0 + (cpw<<((uint32_t)6*(escs-1)));
                    escs--;
                    //printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
                }
                else
                {
                    //invalid escape character - this is bad UTF-8
                    correct = 0;
                    escs = 0;
                    escmode = 0;
                    //don't append anything
                    //printf("incorrect!\n");
                }
            }
            if(escmode!=0 && escs==0)
            {
                //end escape mode, push character, return to mode 0;
                escmode = 0;
                escs = 0;
                codepoints->append(cp0);
            }
        } //for chars in string
        codepoints->shrink_to_fit();
        return correct;
    }
    void uccodepoints_to_string(const amsarray<uint32_t> &codepoints, amsstring &str)
    {
        long I,J;
        uint8_t ch;
        uint32_t cp,bits1,bits2,bits3,bits4;
        str.resize(codepoints.length*4+1);
        str.cstring[str.length]='\0'; //guard against last char not being \0
        J = 0;
        for(I=0;I<codepoints.length;I++)
        {
            cp = codepoints[I];
            if(cp==0)
            {
                str.cstring[J] = '\0';
                J++;
                break;
            }
        //21 bit values
        // 0b0xxxxxxx                               0x00000000  0x0000007F
        // 0b110xxxxx 01xxxxxx                      0x00000080  0x000007FF
        // 0b1110xxxx 01xxxxxx 01xxxxxx             0x00000800  0x0000FFFF
        // 0b11110xxx 01xxxxxx 01xxxxxx 01xxxxxx    0x00010000  0x0010FFFF
            if(cp>0 && cp<=0x0000007F)
            {
                ch = (uint8_t) cp;
                str.cstring[J] = ((ams_chartype)((unsigned char)ch));
                J++;
            }
            if(cp>=0x00000080 && cp<=0x000007FF)
            {
                bits1 = (cp & (0b00111111))+0b10000000;
                bits2 = ((cp & (0b00011111<<6L))>>6)+0b11000000;
                //string.cstring[J] = ((ams_chartype)((unsigned char)bits2));
                str.cstring[J] = bits2;
                J++;
                //string.cstring[J] = ((ams_chartype)((unsigned char)bits1));
                str.cstring[J] = bits1;
                J++;
            }
            if(cp>=0x00000800 && cp<=0x0000FFFF)
            {
                bits1 = (cp & (0b00111111))+0b10000000;
                bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
                bits3 = ((cp & (0b00001111<<12))>>12)+0b11100000;
                str.cstring[J] = ((ams_chartype)((unsigned char)bits3));
                J++;
                str.cstring[J] = ((ams_chartype)((unsigned char)bits2));
                J++;
                str.cstring[J] = ((ams_chartype)((unsigned char)bits1));
                J++;
            }
            if(cp>=0x00010000 && cp<=0x010FFFFF)
            {
                bits1 = (cp & (0b00111111))+0b10000000;
                bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
                bits3 = ((cp & (0b00111111<<12))>>12)+0b10000000;
                bits4 = ((cp & (0b00000111<<18))>>18)+0b11110000;
                str.cstring[J] = ((ams_chartype)((unsigned char)bits4));
                J++;
                str.cstring[J] = ((ams_chartype)((unsigned char)bits3));
                J++;
                str.cstring[J] = ((ams_chartype)((unsigned char)bits2));
                J++;
                str.cstring[J] = ((ams_chartype)((unsigned char)bits1));
                J++;
            }
        }
        str.shrinktofit();
        return;
    }
    void uccodepoints_to_string(const amsarray<uint32_t> *codepoints, amsstring *str)
    {
        long I,J;
        uint8_t ch;
        uint32_t cp,bits1,bits2,bits3,bits4;
        str->resize(codepoints->length*4+1);
        str->cstring[str->length]='\0'; //guard against last char not being \0
        J = 0;
        for(I=0;I<codepoints->length;I++)
        {
            cp = codepoints->at(I);
            if(cp==0)
            {
                str->cstring[J] = '\0';
                J++;
                break;
            }
        //21 bit values
        // 0b0xxxxxxx                               0x00000000  0x0000007F
        // 0b110xxxxx 01xxxxxx                      0x00000080  0x000007FF
        // 0b1110xxxx 01xxxxxx 01xxxxxx             0x00000800  0x0000FFFF
        // 0b11110xxx 01xxxxxx 01xxxxxx 01xxxxxx    0x00010000  0x0010FFFF
            if(cp>0 && cp<=0x0000007F)
            {
                ch = (uint8_t) cp;
                str->cstring[J] = ((ams_chartype)((unsigned char)ch));
                J++;
            }
            if(cp>=0x00000080 && cp<=0x000007FF)
            {
                bits1 = (cp & (0b00111111))+0b10000000;
                bits2 = ((cp & (0b00011111<<6L))>>6)+0b11000000;
                //string.cstring[J] = ((ams_chartype)((unsigned char)bits2));
                str->cstring[J] = bits2;
                J++;
                //string.cstring[J] = ((ams_chartype)((unsigned char)bits1));
                str->cstring[J] = bits1;
                J++;
            }
            if(cp>=0x00000800 && cp<=0x0000FFFF)
            {
                bits1 = (cp & (0b00111111))+0b10000000;
                bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
                bits3 = ((cp & (0b00001111<<12))>>12)+0b11100000;
                str->cstring[J] = ((ams_chartype)((unsigned char)bits3));
                J++;
                str->cstring[J] = ((ams_chartype)((unsigned char)bits2));
                J++;
                str->cstring[J] = ((ams_chartype)((unsigned char)bits1));
                J++;
            }
            if(cp>=0x00010000 && cp<=0x010FFFFF)
            {
                bits1 = (cp & (0b00111111))+0b10000000;
                bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
                bits3 = ((cp & (0b00111111<<12))>>12)+0b10000000;
                bits4 = ((cp & (0b00000111<<18))>>18)+0b11110000;
                str->cstring[J] = ((ams_chartype)((unsigned char)bits4));
                J++;
                str->cstring[J] = ((ams_chartype)((unsigned char)bits3));
                J++;
                str->cstring[J] = ((ams_chartype)((unsigned char)bits2));
                J++;
                str->cstring[J] = ((ams_chartype)((unsigned char)bits1));
                J++;
            }
        }
        str->shrinktofit();
        return;
    }
    void test_unicode_ascii_int_conv()
    {
        int I;
        char c;
        unsigned char uc;
        int J1,J2;
        for(I=0;I<255;I++)
        {
            c = (char)I;
            uc = (unsigned char) c;
            J1 = (int)c;
            J2 = (int)uc;
            printf("%d\t%c\t%c\t%d\t%d\n",I,c,uc,J1,J2);
        }
        return;
    }
    static void test_unicode_conv1_sub(uint32_t codept)
    {
        long I;
        amsarray<uint32_t> codepts1;
        amsarray<uint32_t> codepts2;
        amsstring s1,s2;
        codepts1.resize(1);
        codepts1[0] = codept;
        uccodepoints_to_string(codepts1,s1);
        printf("UCC: "); _intl_print_ui32bits(codepts1[0]); printf("\n");
        printf("STR: ");
        for(I=0;I<s1.length;I++)
        {
            _intl_print_ui8bits((uint8_t)(unsigned char)s1.cstring[I]);
            printf(" ");
        }
        printf("\n");
        string_to_uccodepoints(s1,codepts2);
        printf("UCC: "); _intl_print_ui32bits(codepts2[0]); printf("\n");
        uccodepoints_to_string(codepts2,s2);
        printf("STR: ");
        for(I=0;I<s2.length;I++)
        {
            _intl_print_ui8bits(s2.cstring[I]);
            printf(" ");
        }
        printf("\n");
        if(s1==s2)
        {
            printf("STR: PASS\t");
        }
        else
        {
            printf("STR: FAIL\t");
        }
        if(codepts1[0]==codepts2[0])
        {
            printf("UCC: PASS\n");
        }
        else
        {
            printf("UCC: FAIL\n");
        }
        //printf("Can I print the char to terminal?: %s\n",s1.cstring);
        return;
    }
    void test_unicode_conv1()
    {
        uint32_t codept;
        //Test bounding cases
        codept = 0x7F;
        test_unicode_conv1_sub(codept);
        printf("\n\n");
        codept = 0x07FF;
        test_unicode_conv1_sub(codept);
        printf("\n\n");
        codept = 0xFFFF;
        test_unicode_conv1_sub(codept);
        printf("\n\n");
        codept = 0x0010FFFF;
        test_unicode_conv1_sub(codept);
        printf("\n\n");
        codept = 0x07FFFFFF;
        test_unicode_conv1_sub(codept);
        printf("\n\n");
        return;
    }
    static int test_unicode_conv2_sub(amsarray<uint32_t> *cp)
    {
        int I;
        int pass = 1;
        amsstring s1,s2;
        amsarray<uint32_t> cp2;
        uccodepoints_to_string(cp,&s1);
        string_to_uccodepoints(&s1,&cp2);
        uccodepoints_to_string(&cp2,&s2);
        if(s1==s2 && *cp==cp2)
        {
            pass = 1;
        }
        else
        {
            pass = 0;
            printf("FAIL: \n");
        }
        return pass;
    }
    static void test_gen_rand_codepts(int len, amsarray<uint32_t> *cp)
    {
        long I;
        cp->resize(len+1);
        for(I=0;I<len;I++)
        {
            cp->at(I) = ams::randi(1,0x0010FFFF);
        }
        cp->at(len) = 0;
        return;
    }
    void test_unicode_conv2()
    {
        long I,J;
        int pass;
        amsarray<uint32_t> cp;
        long ntests = 10000;
        long passes = 0;
        long failures = 0;
        int len = 30;
        printf("Testing unicode to string conversion.\n");
        printf("%ld tests of %d random codepoints each.\n",ntests,len);
        for(I=0;I<ntests;I++)
        {
            test_gen_rand_codepts(10,&cp);
            if(I==0)
            {
                printf("ex cp string:");
                for(J=0;J<cp.length;J++)
                {
                    printf("%d,",cp[J]);
                }
                printf("\n");
            }
            pass = test_unicode_conv2_sub(&cp);
            if(pass==1)
            {
                passes++;
            }
            else
            {
                failures++;
            }
            if(I%(ntests/10)==0)
            {
                printf("Test %ld....\n",I);
            }
        }
        printf("%ld tests, %ld passes, %ld failures.\n",ntests,passes,failures);
        return;
    }
 };
--- a/src/main.cpp
+++ b/src/main.cpp
@ -4,5 +4,6 @@ int main(int argc, char* argv[])
 {
    int ret = 0;
    printf("ams string4 library tests.\n");
    return ret;
 }