copypaste

master
madrocketsci 5 days ago
parent 87ccfdf279
commit eb9ae4316f

Binary file not shown.

@ -4,11 +4,163 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <math.h> #include <math.h>
#include <string.h>
#include <stdarg.h>
#include <vector>
#include <string>
#include <limits>
#include <locale>
#include <amsmathutil25/amsmathutil25.hpp>
namespace ams namespace ams
{ {
//wraps the functions strcpy_s and strncpy in a portable manner
//between linux and microsoft standard C libraries.
int amsstrcpy_s(char *dest, int size, const char *src);
//wraps the functions sprintf_s and snprintf in a portable manner
//between linux and microsoft standard C libraries.
int amssprintf_s(char *s, int n, const char *format, ...);
//Using the C library's sscanf function is more robust
//than atod or atof. It returns valid numbers for infs and nans
//Returns nan for any uninterpretable string
double amsstrtonum(const char *str);
typedef char ams_chartype;
static const ams_chartype ams_char_cr = (ams_chartype) '\r'; //carriage return
static const ams_chartype ams_char_lf = (ams_chartype) '\n'; //newline
static const ams_chartype ams_char_tb = (ams_chartype) '\t'; //tab
static const ams_chartype ams_char_nt = (ams_chartype) '\0'; //null terminator
class amsstring
{
public:
ams_chartype blank; // null terminator returned for accessing index out of bounds
ams_chartype *cstring;
int length;
//length will be set to the length of the cstring not including the null terminating char
//Basic functions
amsstring();
~amsstring();
amsstring(amsstring &other);
amsstring& operator=(amsstring &other);
amsstring(const amsstring &other);
const amsstring& operator=(const amsstring &other);
amsstring(ams_chartype *other);
amsstring(const ams_chartype *other);
amsstring& operator=(ams_chartype *other);
const amsstring& operator=(const ams_chartype *other); //assign string constant to amsstring
//const amsstring& operator=(const ams_chartype *other) const; //assign string constant to amsstring
//const is a disease!
//
//amsstring(int length);
//amsstring(int length, const ams_chartype initchar);
int resize(const int newlen);
int size() const;
ams_chartype& operator[](const int ind);
const ams_chartype& operator[](const int ind) const;
ams_chartype& at(const int ind);
const ams_chartype& at(const int ind) const;
void clear();
void setall(const ams_chartype val, const int newlen);
void shrinktofit();
//string comparisons
bool operator==(const amsstring &other) const;
bool operator==(const char *other) const;
bool operator!=(const amsstring &other) const;
bool operator!=(const char *other) const;
//string ordering comparison
//alphebetizes strings by ASCII character
//longer strings compare larger than shorter ones
bool operator<(const amsstring &other) const;
bool operator>(const amsstring &other) const;
bool operator<(const ams_chartype *other) const;
bool operator>(const ams_chartype *other) const;
//Insert, Remove, and Substring
void insert(const int ind, const amsstring other);
void insert(const int ind, const ams_chartype *other);
void remove(const int ind);
void remove(const int ind1, const int ind2);
void substring(const int ind1, const int ind2, amsstring *sout) const;
//Append
void append(const amsstring &other);
void append(const ams_chartype *other);
void append(const ams_chartype other);
amsstring operator+(const amsstring &other);
const amsstring operator+(const amsstring &other) const;
amsstring operator+(const ams_chartype *other);
const amsstring operator+(const ams_chartype *other) const;
amsstring operator+(const ams_chartype other);
const amsstring operator+(const ams_chartype other) const;
//Find
int find(const amsstring findstr, const int indstart=0, const bool casesens=1) const;
int find(const ams_chartype *findstr, const int indstart=0, const bool casesens=1) const;
int find(const ams_chartype c, const int indstart=0, const bool casesens=1) const;
//formatted input
int sprintf(int bufflen, const ams_chartype *formatstring, ...);
void tolower();
void toupper();
bool isvalidnumber();
double strtonum();
}; };
//needs work
void splitlines(amsstring *s, std::vector<amsstring> *lns);
void splitlines(amsstring *s, ams::amsarray<amsstring> *lns);
void split(amsstring *s, const ams_chartype delimitchar, std::vector<amsstring> *lns);
void split(amsstring *s, const ams_chartype delimitchar, ams::amsarray<amsstring> *lns);
void split(amsstring *s, const ams_chartype *delimitstr, std::vector<amsstring> *lns);
void split(amsstring *s, const ams_chartype *delimitstr, ams::amsarray<amsstring> *lns);
void split(amsstring *s, amsstring *delimitstr, std::vector<amsstring> *lns);
void split(amsstring *s, amsstring *delimitstr, ams::amsarray<amsstring> *lns);
//splits a string, not counting whitespaces between non-whitespace characters
void splitwhitespace(amsstring *s, std::vector<amsstring> *lns);
void splitwhitespace(amsstring *s, ams::amsarray<amsstring> *lns);
//removes all whitespace characters '\t','\r','\n' included
//to the left and right of the string (but not in the middle)
void stripwhitespace(amsstring *s);
//completely removes all whitespace entirely
void stripallwhitespace(amsstring *s);
void freadline(FILE *fp, amsstring *s);
void freadlines(FILE *fp, std::vector<amsstring> *lines);
void fwritelines(FILE *fp, amsstring *s);
void fwritelines(FILE *fp, std::vector<amsstring> *lines);
void freadtxtfile(FILE *fp, amsstring *s);
}; //end namespace ams
#include <amsstring4/amsstring4_unicode.hpp>
#include <amsstring4/amsstring4_bintextencoding.hpp>
#include <amsstring4/amsstring4_tests.hpp>
#endif #endif

@ -0,0 +1,28 @@
#ifndef __AMSSTRING4_BINTEXTENCODING_HPP__
#define __AMSSTRING4_BINTEXTENCODING_HPP__
namespace ams
{
//Only processes strings of length divisible by 4, with
//expected 0,1,2 padding chars at the end of the string,
//and no non-coding characters.
void base64encode(ams::amsarray<uint8_t> *bytes, amsstring *str);
int base64decode(amsstring *str, ams::amsarray<uint8_t> *bytes, bool bstrict);
//decodes, ignoring (as in MIME spec) all characters that are not
//valid b64 alphabet chars, and all padding until the end of the string
int base64decode_liberal(amsstring *str, ams::amsarray<uint8_t> *bytes);
int base64decode_strict(amsstring *str, ams::amsarray<uint8_t> *bytes);
void test_base64encode();
void test_base64encode_fuzztest();
};
#endif

@ -0,0 +1,27 @@
#ifndef __AMSSTRING4_TESTS_HPP__
#define __AMSSTRING4_TESTS_HPP__
namespace ams
{
void amsstring3_basic_string_test1();
void amsstring3_sscanf_test1();
void amsstring3_basic_string_test2();
void amsstring3_memoryleakcheck1();
void amsstring3_memoryleakcheck2();
void amsstring3_stringtests2();
void amsstring3_test_find();
void amsstring3_test_splitlines();
void amsstring3_test_split();
void amsstring3_test_strip();
void amsstring3_test_freadwrite();
void amsstring3_test_concatenation_operators();
};
#endif

@ -0,0 +1,22 @@
#ifndef __AMSSTRING4_UNICODE_HPP__
#define __AMSSTRING4_UNICODE_HPP__
namespace ams
{
int string_to_uccodepoints(const amsstring &str, amsarray<uint32_t> &codepoints);
int string_to_uccodepoints(const amsstring *str, amsarray<uint32_t> *codepoints);
void uccodepoints_to_string(const amsarray<uint32_t> &codepoints, amsstring &str);
void uccodepoints_to_string(const amsarray<uint32_t> *codepoints, amsstring *str);
void test_unicode_ascii_int_conv();
void test_unicode_conv1();
void test_unicode_conv2();
};
#endif

@ -0,0 +1,565 @@
#include <amsstring4/amsstring4.hpp>
namespace ams
{
//PGP / GPG text armor, binary encoding scheme:
//
//HTML embedded image file binary encoding scheme:
//example:
// <img alt="Embedded Image" width="158" height="158"
// src="..." />
// ref: https://stackoverflow.com/questions/11474346/how-to-encode-images-within-html
// Embed other stuff!
// Data URIs can potentially store any type of data, not just images! Try these examples on for size: (X)HTML CSS Embedding Example
// <link rel="stylesheet" type="text/css"
// href="data:text/css;base64,LyogKioqKiogVGVtcGxhdGUgKioq..." />
// (X)HTML Javascript Embedding Example
// <script type="text/javascript"
// src="data:text/javascript;base64,dmFyIHNjT2JqMSA9IG5ldyBzY3Jv..."></script>
//I think these are both base-64 encodings of a binary blob
//ref: https://security.stackexchange.com/questions/142043/how-are-pgp-messages-constructed
//also widely used for email attachments
//Base64: 3 bytes (24-bits) 11111111 22222222 33333333
// converted to 4 base-64 digits
// 111111 112222 222233 333333
// subtleties on termination of a string of bytes that doesn't divide by 3
//real reference:
// https://datatracker.ietf.org/doc/html/rfc4648#section-4
// Table 1: The Base 64 Alphabet
// Value Encoding Value Encoding Value Encoding Value Encoding
// 0 A 17 R 34 i 51 z
// 1 B 18 S 35 j 52 0
// 2 C 19 T 36 k 53 1
// 3 D 20 U 37 l 54 2
// 4 E 21 V 38 m 55 3
// 5 F 22 W 39 n 56 4
// 6 G 23 X 40 o 57 5
// 7 H 24 Y 41 p 58 6
// 8 I 25 Z 42 q 59 7
// 9 J 26 a 43 r 60 8
// 10 K 27 b 44 s 61 9
// 11 L 28 c 45 t 62 +
// 12 M 29 d 46 u 63 /
// 13 N 30 e 47 v
// 14 O 31 f 48 w (pad) =
// 15 P 32 g 49 x
// 16 Q 33 h 50 y
//ord is 0-63, 64 for '='
ams_chartype base64_char(int8_t ord)
{
ams_chartype ret;
if(ord==64)
{
ret = '=';
}
else if(ord>=0&&ord<26)
{
ret = (ams_chartype)(ord+65);
}
else if(ord>=26&&ord<52)
{
ret = (ams_chartype)((ord-26)+97);
}
else if(ord>=52&&ord<62)
{
ret = (ams_chartype)((ord-52)+48);
}
else if(ord==62)
ret = '+';
else if(ord==63)
ret = '/';
else
ret = '\0'; //NULL character is for an out of range ord
return ret;
}
int8_t base64_ord(ams_chartype ch)
{
uint8_t ret;
uint8_t ucval = (unsigned char) ch;
if(ucval>=65 && ucval<91)
{
ret = ucval-65;
}
else if(ucval>=97 && ucval<123)
{
ret = ucval-97+26;
}
else if(ucval>=48 && ucval<58)
{
ret = ucval-48+52;
}
else if(ch=='+')
{
ret = 62;
}
else if(ch=='/')
{
ret = 63;
}
else if(ch=='=')
{
ret = 64;
}
else
{
ret = -1;
}
return ret;
}
static void b64_encode_3byteblock(uint8_t *bytes, ams_chartype *chars)
{
uint8_t o1,o2,o3,o4;
o1 = ((bytes[0] & 0b11111100)>>2);
o2 = (((bytes[0] & 0b00000011)<<4) + ((bytes[1] & 0b11110000)>>4));
o3 = (((bytes[1] & 0b00001111)<<2) + ((bytes[2] & 0b11000000)>>6));
o4 = ((bytes[2] & 0b00111111));
chars[0] = base64_char(o1);
chars[1] = base64_char(o2);
chars[2] = base64_char(o3);
chars[3] = base64_char(o4);
return;
}
static void b64_encode_2byteblock(uint8_t *bytes, ams_chartype *chars)
{
uint8_t o1,o2,o3;
o1 = ((bytes[0] & 0b11111100)>>2);
o2 = (((bytes[0] & 0b00000011)<<4) + ((bytes[1] & 0b11110000)>>4));
o3 = (((bytes[1] & 0b00001111)<<2));
chars[0] = base64_char(o1);
chars[1] = base64_char(o2);
chars[2] = base64_char(o3);
chars[3] = '=';
return;
}
static void b64_encode_1byteblock(uint8_t *bytes, ams_chartype *chars)
{
uint8_t o1,o2;
o1 = ((bytes[0] & 0b11111100)>>2);
o2 = (((bytes[0] & 0b00000011)<<4));
chars[0] = base64_char(o1);
chars[1] = base64_char(o2);
chars[2] = '=';
chars[3] = '=';
return;
}
static int decode_4charblock(ams_chartype *chars, uint8_t *bytes)
{
int ret = 1;
uint8_t o1,o2,o3,o4,b1,b2,b3;
int len;
o1 = base64_ord(chars[0]);
o2 = base64_ord(chars[1]);
o3 = base64_ord(chars[2]);
o4 = base64_ord(chars[3]);
len = 3;
if(chars[3]=='=') {len--; o4 = 0;}
if(chars[2]=='=') {len--; o3 = 0;}
if(chars[1]=='=') {len--; o2 = 0;}
if(len>0)
{
//111111 222222 333333 444444
//11111122 22223333 33444444
b1 = ((o1<<2)+((o2 & 0b00110000)>>4));
b2 = (((o2 & 0b00001111)<<4) + ((o3 & 0b00111100)>>2));
b3 = (((o3 & 0b00000011)<<6) + (o4 & 0b00111111));
if(len>=1) bytes[0] = b1;
if(len>=2) bytes[1] = b2;
if(len>=3) bytes[2] = b3;
}
return ret;
}
void base64encode(ams::amsarray<uint8_t> *bytes, amsstring *str)
{
long I,J;
//str->resize((bytes->length*4)/3+2);
if( bytes->length % 3 == 0)
{
str->resize((bytes->length/3)*4);
}
else
{
str->resize((bytes->length/3+1)*4);
}
I = 0; J = 0;
while(I<bytes->length)
{
if(bytes->length-I>=3)
{
//printf("debug3: %c%c%c\n",bytes->data[I],bytes->data[I+1],bytes->data[I+2]);
b64_encode_3byteblock(&(bytes->data[I]),&(str->cstring[J]));
I = I + 3;
J = J + 4;
}
else if(bytes->length-I==2)
{
//printf("debug2: %c%c\n",bytes->data[I],bytes->data[I+1]);
b64_encode_2byteblock(&(bytes->data[I]),&(str->cstring[J]));
I = I + 2;
J = J + 4;
}
else if(bytes->length-I==1)
{
//printf("debug1: %c\n",bytes->data[I]);
b64_encode_1byteblock(&(bytes->data[I]),&(str->cstring[J]));
I = I + 1;
J = J + 4;
}
else
{
break;
}
}
//str->shrinktofit();
str->cstring[str->length] = '\0';
return;
}
static long __intl_localstrlen(amsstring *str)
{
long ret = 0;
long I;
for(I=0;I<str->length;I++)
{
if(str->cstring[I]=='\0')
{
ret = I;
break;
}
}
return ret;
}
//Only processes strings of length divisible by 4, with
//expected 0,1,2 padding chars at the end of the string,
//and no non-coding characters.
int base64decode_strict(amsstring *str, ams::amsarray<uint8_t> *bytes)
{
int ret = 1;
int fail = 0;
long len;
long I,J;
uint8_t v;
int bp;
if(str->length == 0)
{
bytes->resize(0);
ret = 1;
return ret;
}
if(str->length % 4 !=0)
{
ret = -1; //invalid length
bytes->resize(0);
return ret;
}
//resize bytes to indicated size
len = (str->length/4)*3;
if(str->cstring[str->length-1]=='=') len--;
if(str->cstring[str->length-2]=='=') len--;
if(str->cstring[str->length-3]=='=')
{
ret = -2; //unexpected number of padding chars
bytes->resize(0);
return ret;
}
bytes->resize(len);
fail = 0;
I = 0; J = 0;
//for(I=0;I<str->length && fail==0;I++)
while(I<str->length && fail==0)
{
for(bp=0;bp<4;bp++)
{
v = base64_ord(str->cstring[I+bp]);
//printf("debug: %d %d %c %d\n",I,I+bp,str->cstring[I+bp],v);
if(v<0 || (v>=64 && (I+bp)<str->length-2) || v>=65)
{
//printf("dbg: fail!\n");
fail = 1;
break;
}
}
decode_4charblock(&(str->cstring[I]),&(bytes->data[J]));
// for(bp=0;bp<3;bp++)
// {
// printf("debug: %d %d %c\n",J+bp,bytes->data[J+bp],bytes->data[J+bp]);
// }
I = I + 4;
J = J + 3;
}
if(fail==1)
{
ret = -3; //invalid char encountered
bytes->resize(0);
}
//fail on any non-coding chars while parsing
return ret;
}
//decodes, ignoring (as in MIME spec) all characters that are not
//valid b64 alphabet chars, and all padding until the end of the string
int base64decode_liberal(amsstring *str, ams::amsarray<uint8_t> *bytes)
{
int ret = 1;
long I,J;
amsstring str2;
ams_chartype ch;
int64_t v;
str2.resize(str->length+4);
//filter chars in str to remove all non-coding chars
J = 0;
for(I=0;I<str->length;I++)
{
ch = str->cstring[I];
if(ch=='\0')
break;
v = base64_ord(ch);
if(v>=0 && v<64)
{
str2.cstring[J] = ch;
J = J + 1;
}
}
if(J%4==0)
{
str2.resize(J);
str2.cstring[str2.length] = '\0';
}
if(J%4==1)
{
ret = 0; //this isn't really valid
str2.cstring[J] = '='; J++;
str2.cstring[J] = '='; J++;
str2.cstring[J] = '='; J++;
str2.resize(J);
str2.cstring[str2.length] = '\0';
}
if(J%4==2)
{
str2.cstring[J] = '='; J++;
str2.cstring[J] = '='; J++;
str2.resize(J);
str2.cstring[str2.length] = '\0';
}
if(J%4==3)
{
str2.cstring[J] = '='; J++;
str2.resize(J);
str2.cstring[str2.length] = '\0';
}
str2.shrinktofit();
ret = base64decode_strict(&str2,bytes);
return ret;
}
static void _intl_convsb(amsstring *s, amsarray<uint8_t> *b)
{
long I;
b->resize(s->length);
for(I=0;I<s->length;I++)
{
b->at(I) = (unsigned char) s->cstring[I];
}
return;
}
static void _intl_convbs(amsarray<uint8_t> *b,amsstring *s)
{
long I;
s->resize(b->length);
for(I=0;I<b->length;I++)
{
s->cstring[I] = b->data[I];
}
s->cstring[s->length] = '\0';
return;
}
void test_base64encode1()
{
int I;
int8_t o1,o2;
ams_chartype c1,c2;
for(I=-5;I<70;I++)
{
o1 = I;
c1 = base64_char(o1);
o2 = base64_ord(c1);
c2 = base64_char(o2);
printf("%d %d %c %d %c\n",(int)I,(int)o1,c1,(int)o2,c2);
}
return;
}
//segmentation faults in decode_liberal
void test_base64encode()
{
printf("Tests of base64 encoding/deconding.\n");
amsstring s1,s1e,s2;
amsarray<uint8_t> b1,b2;
int ret = 0;
bool bstrict = 1;
s1 = "light work.";
_intl_convsb(&s1,&b1);
base64encode(&b1,&s1e);
ret = base64decode(&s1e,&b2,bstrict);
_intl_convbs(&b2,&s2);
// for(I=0;I<b1.length;I++)
// printf("%d:",b1.at(I));
// printf("\n");
printf("Original: '%s'\n",s1.cstring);
printf("Encoded: '%s'\n",s1e.cstring);
printf("Decoded: '%s', ret=%d\n",s2.cstring,ret);
s1 = "light work";
_intl_convsb(&s1,&b1);
base64encode(&b1,&s1e);
ret = base64decode(&s1e,&b2,bstrict);
_intl_convbs(&b2,&s2);
// for(I=0;I<b1.length;I++)
// printf("%d:",b1.at(I));
// printf("\n");
printf("Original: '%s'\n",s1.cstring);
printf("Encoded: '%s'\n",s1e.cstring);
printf("Decoded: '%s', ret=%d\n",s2.cstring,ret);
s1 = "light wor";
_intl_convsb(&s1,&b1);
base64encode(&b1,&s1e);
s1e.insert(3,"\n");
s1e.insert(5,"\t");
s1e.insert(7,"}");
//s1e.insert(1,"}");
ret = base64decode(&s1e,&b2,bstrict);
//ret = base64decode_strict(&s1e,&b2);
_intl_convbs(&b2,&s2);
// for(I=0;I<b1.length;I++)
// printf("%d:",b1.at(I));
// printf("\n");
printf("Original: '%s'\n",s1.cstring);
printf("Encoded: '%s'\n",s1e.cstring);
printf("Decoded: '%s', ret=%d\n",s2.cstring,ret);
return;
}
void test_base64encode_fuzztest()
{
long I,J;
ams::amsarray<uint8_t> bytes,bytes2;
amsstring str,str2;
int passed = 0;
int failed = 0;
for(I=0;I<100;I++)
{
printf("Test %d\n",(int)I);
bytes.resize(I);
for(J=0;J<bytes.size();J++)
{
bytes.data[J] = randd()*255;
}
base64encode(&bytes,&str);
base64decode(&str,&bytes2,1);
base64encode(&bytes2,&str2);
if(bytes==bytes2 && str==str2)
{
passed++;
}
else
{
failed++;
}
}
printf("passed: %d, failed %d\n",passed,failed);
return;
}
int base64decode(amsstring *str, ams::amsarray<uint8_t> *bytes, bool bstrict=0)
{
int ret = 0;
if(bstrict==1)
{
ret = base64decode_strict(str,bytes);
}
else
{
ret = base64decode_liberal(str,bytes);
}
return ret;
}
};

File diff suppressed because it is too large Load Diff

@ -0,0 +1,177 @@
#include <amsstring4/amsstring4.hpp>
namespace ams
{
//snprintf, vsnprintf should now be part of the C++ standard library
//as of C++11, so I don't think I need quite as elaborate a compatibility
//shim as in the previous library.
//It *should* compile with MinGW and Visual Studio.
//src must be a NULL terminated string, or have more indices than the size of the destination buffer
//I'm seeing platform specific memory leaks in the strncpy and strcpy_s implementations in valgrind
//This is a dirt simple function, I shouldn't *need* to depend on a platform specific implementation,
//... so, writing my own.
//
//This function copies the string src to dest.
//It stops when either size-1 characters have been copied to
//dest, or a null terminator has been encountered in src.
//
//The return value is the number of characters copied, excluding the null terminator
//or an error code.
//
//All additional positions in dest are padded with null terminators. Size is intended to be the size
//of the dest buffer.
int amsstrcpy_s(char *dest, int size, const char *src)
{
int ret = 0;
int I=0;
char c='\0';
if(dest==NULL)
{
ret = -2;
return ret;
}
if(src==NULL)
{
ret = -1;
return ret;
}
for(I=0;I<size-1;I++)
{
c = src[I];
ret = I;
if(c=='\0')
{
dest[I] = '\0';
break;
}
dest[I] = c;
}
for(I=ret+1;I<size;I++)
{
dest[I] = '\0';
}
dest[size-1] = '\0';
return ret;
}
//wrapper for strcpy_s and strncpy which should be portable between gnu and microsoft C libraries
//strcpy_s
//strncpy
// int amsstrcpy_s(char *dest, int size, const char *src)
// {
// int ret = 0;
// if(dest!=NULL)
// {
// if(src!=NULL)
// {
// #if defined(LINUX) || defined(linux) || defined(__linux__) || defined(__GNUC__)
// //use strncpy
// strncpy(dest,src,size);
// ret = 0;
// if(size>0)
// {
// dest[size-1] = '\0';
// }
// #elif defined(__MINGW32__) || defined(__MINGW64__) || defined(_WIN32)
// //use strcpy_s
// //ret = (int)strcpy_s(dest,size,src);
// strcpy_s(dest,size,src);
// ret = 0;
// if(size>0)
// {
// dest[size-1] = '\0';
// }
// #else
// #pragma message("amsstrcpy_s: Unsupported architecture - neither linux nor mingw64 nor msvc")
// #endif
// }
// else
// {
// if(size>0)
// {
// dest[0] = '\0';
// ret = -2; //src was NULL
// }
// }
// }
// else
// {
// ret = -1; //dest was a null pointer
// }
// return ret;
// }
//sprintf_s
//snprintf
//
int amssprintf_s(char *s, int n, const char *format, ...)
{
int ret = 0;
va_list args;
va_start(args, format);
//exampleV(b, args);
//va_arg(val,datatype)
if(s!=NULL)
{
#if defined(LINUX) || defined(linux) || defined(__linux__) || defined(__GNUC__)
//use snprintf
ret = (int)vsnprintf(s,n,format,args);
s[n-1] = '\0';
#elif defined(__MINGW32__) || defined(__MINGW64__) || defined(_WIN32)
//use sprintf_s
ret = (int)vsprintf_s(s,n,format,args);
#else
#pragma message("amssprintf_s: Unsupported architecture - neither linux nor mingw64 nor msvc")
#endif
}
va_end(args);
return ret;
}
//Using the C library's sscanf function is more robust
//than atod or atof. It returns valid numbers for infs and nans
//Returns nan for any uninterpretable string
double amsstrtonum(const char *str)
{
int q;
double ret = std::numeric_limits<double>::quiet_NaN();
try
{
//sscanf(s.cstring,"%lf",&ret);
//stod(const std::string& str, std::size_t* pos)
//calls std::strtod
//strtod(const char *, &ptr)
//wcstrtod
//ret = atof(str); //old c-style parser
q = sscanf(str,"%lf",&ret);
if(q==0)
{
ret = std::numeric_limits<double>::quiet_NaN();
}
}
catch(int e)
{
ret = std::numeric_limits<double>::quiet_NaN();
}
return ret;
}
};

@ -0,0 +1,475 @@
#include <amsstring4/amsstring4.hpp>
namespace ams
{
void amsstring3_basic_string_test1()
{
char q1,c;
unsigned char q2;
uint8_t q3;
int I;
printf("Basic string tests1.\n");
for(I=-127;I<256;I++)
{
c = (char)I;
q2 = (unsigned char) I;
//printf("I=%d %d %c %02x %02x %02x\n",I,(int)c,c,c,(unsigned char)c,I);
printf("I=%d, %c, %02x\n",I,q2,q2);
}
q2 = (unsigned char) '\r';
printf("\nLF: %c After LF %c After LF2 \n",q2,q2);
}
void amsstring3_sscanf_test1()
{
char buf[500];
double d;
int q;
// //vsnprintf(buf,500," -123.456E10 ");
// snprintf(buf,500," -123.456E10 ");
// //sprintf_s(buf,500," -123.456E10 ");
// q=sscanf(buf,"%lf",&d);
// printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
// snprintf(buf,500," -inf ");
// //sprintf_s(buf,500," -123.456E10 ");
// q=sscanf(buf,"%lf",&d);
// printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
// snprintf(buf,500,"3");
// //sprintf_s(buf,500," -123.456E10 ");
// q=sscanf(buf,"%lf",&d);
// printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
// snprintf(buf,500," #.QUAN0 ");
// //sprintf_s(buf,500," -123.456E10 ");
// q=sscanf(buf,"%lf",&d);
// printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
// snprintf(buf,500,"nan");
// //sprintf_s(buf,500," -123.456E10 ");
// q=sscanf(buf,"%lf",&d);
// printf("Buffer %s reads as %1.4g, q=%d\n",buf,d,q);
amssprintf_s(buf,500,"-3");
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
amssprintf_s(buf,500," -3 ");
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
amssprintf_s(buf,500,"\t\t-3\t\n ");
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
amssprintf_s(buf,500," +3E+1 ");
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
amssprintf_s(buf,500,"2,3,4");
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
amssprintf_s(buf,500,"inf");
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
amssprintf_s(buf,500,"-inf");
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
amssprintf_s(buf,500,"nan");
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
amssprintf_s(buf,500,"1.0*4E3");
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
amssprintf_s(buf,2,"2,3,4");
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
amssprintf_s(buf,2,NULL);
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
amssprintf_s(NULL,2,"100");
d = amsstrtonum(buf);
printf("String %s reads as %1.4g\n",buf,d);
}
void amsstring3_basic_string_test2()
{
amsstring s1,s2;
//const amsstring s3; //don't do this - just accept that strings must be mutable
s1="Hello world";
s2 = s1;
printf("%d %c\n",(ams_chartype) '\0', (ams_chartype) '\0');
printf("s1: '%s', s2: '%s'\n",s1.cstring,s2.cstring);
int I;
for(I=-5;I<s2.size()+5;I++)
{
printf("s2[%d]: %d, %c \n",I,s2[I],s2[I]);
}
for(I=-5;I<s2.size()+5;I++)
{
s1[I] = 'a';
};
printf("s1 = %s\n",s1.cstring);
for(I=-5;I<s2.size()+5;I++)
{
printf("s1[%d]: %d, %c \n",I,s1[I],s1[I]);
}
printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
s1 = "Hello";
s2 = "Hello";
printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
s2.resize(15);
s2[10] = 'b';
//s2[5] = 'b';
//s2[6] = 'b';
printf("s1.size()=%d, s2.size()=%d\n",s1.size(),s2.size());
printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
s2[10] = 'a';
s2.shrinktofit();
printf("s1.size()=%d, s2.size()=%d\n",s1.size(),s2.size());
printf("s1:'%s'==s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1==s2);
s1 = "hello"; s2 = "Hello";
printf("s1:'%s'>s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1>s2);
s1 = "hello"; s2 = "Hello";
printf("s1:'%s'<s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1<s2);
s1 = "Hello"; s2 = "Hello";
printf("s1:'%s'>s2:'%s'?:%d\n",s1.cstring,s2.cstring,s1>s2);
//s3 = "Hello constant world.";
//printf("s3= '%s'\n",s3.cstring);
}
void amsstring3_memoryleakcheck1()
{
amsstring q1,q2,q3;
int I;
q1.sprintf(1000,"%1.100g,",ams::pi);
printf("q1='%s'\n",q1.cstring);
printf("q1.size()=%d\n",q1.size());
q2 = q1;
for(I=0;I<100;I++)
{
q1.substring(0,q1.length-1,&q1);
printf("q1 = substr; q1='%s'\n",q1.cstring);
}
q1 = q2;
q1.resize(10000000);
for(I=0;I<20;I++)
{
printf("resize test %d\n",(int)I);
q2 = q1;
q2.resize(10000000);
q3 = q2;
q3.resize(10000000);
q1 = q3;
q1.resize(10000000);
}
return;
}
void amsstring3_memoryleakcheck2()
{
int I;
amsstring q1;
for(I=0;I<100;I++)
{
q1.sprintf(4,"%1.100g",ams::pi);
}
printf("q1=%s\n",q1.cstring);
}
void amsstring3_stringtests2()
{
amsstring q1,q2;
int I;
amsarray<amsstring> qarr;
q1.insert(0,"Hello world");
printf("q1='%s'\n",q1.cstring);
for(I=-2;I<15;I++)
{
q2 = q1;
q2.insert(I,"<insert>");
printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' size=%d\n",I,q2.cstring,q2.size());
}
q1 = "Hello world.";
for(I=-5;I<15;I++)
{
q2 = q1;
q2.remove(I);
printf("q2.remove(%d) = '%s'\n",I,q2.cstring);
}
for(I=-5;I<15;I++)
{
q2 = q1;
q2.remove(I,I+2);
printf("q2.remove(%d,%d) = '%s'\n",I,I+2,q2.cstring);
}
q1 = "Hello hEllo 1,2,3;";
printf("q1='%s'\n",q1.cstring);
q1.tolower();
printf("q1='%s'\n",q1.cstring);
q1.toupper();
printf("q1='%s'\n",q1.cstring);
q1.append("hello more appened stuff...");
printf("q1='%s'\n",q1.cstring);
q1.substring(-5,5,&q1);
printf("q1='%s'\n",q1.cstring);
q1 = "Hello 1,2,3";
printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
q1 = " 3.1415 ";
printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
q1 = "-inf";
printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
q1 = "nan";
printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
q1 = "1.1E1,2.2E2";
printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
q1 = ",1,2,3";
printf("q1='%s', q1.isvalidnumber() = %d, q1.strtonum=%1.6g\n",q1.cstring,q1.isvalidnumber(),q1.strtonum());
// qarr.resize(1000);
// for(I=0;I<1000;I++)
// {
// qarr[I].sprintf(1000,"%1.500g\n",ams::pi);
// qarr[I].resize(1000000);
// }
// printf("%d",qarr[0].size());
q1 = "Hello world";
for(I=-2;I<15;I++)
{
q1.substring(I,I+3,&q2);
printf("q1[%d:%d] = '%s' size=%d\n",I,I+3,q2.cstring,q2.size());
}
return;
}
void amsstring3_test_find()
{
amsstring q1,q2,q3;
int I;
q1 = "hello world";
q3 = "<inser";
printf("q3='%s'\n",q3.cstring);
for(I=-1;I<13;I++)
{
q2 = q1;
q2.insert(I,"<insert>");
printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' q2.find(q3)=%d\n",I,q2.cstring,q2.find(q3,0,0));
}
q3 = "<inSeRt>";
printf("\nq3='%s'\n",q3.cstring);
for(I=-1;I<13;I++)
{
q2 = q1;
q2.insert(I,"<insert>");
printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' q2.find(q3)=%d\n",I,q2.cstring,q2.find(q3,0,0));
}
q3 = "<insert> ";
printf("\nq3='%s'\n",q3.cstring);
for(I=-1;I<13;I++)
{
q2 = q1;
q2.insert(I,"<insert>");
printf("q2=q1;q2.insert(%d,'<insert'>) = '%s' q2.find(q3)=%d\n",I,q2.cstring,q2.find(q3,0,0));
}
return;
}
void amsstring3_test_splitlines()
{
int I;
amsstring q1;
amsarray<amsstring> lns;
std::vector<amsstring> lns2;
q1 = "This is a \n string on \n multiple \r\n lines\n\n with CR\\LFs\n";
//q1 = "\n\n";
//q1 = "";
//q1 = "More malformed\r string nonsense\n\r\n\r\r\na";
printf("q1='%s'\n",q1.cstring);
splitlines(&q1,&lns2);
for(I=0;I<lns2.size();I++)
{
printf("Line %d: '%s'\n",I,lns2[I].cstring);
}
return;
}
void amsstring3_test_split()
{
amsstring q1;
std::vector<amsstring> strs;
int I;
q1 = "this is a string to split ";
printf("string='%s'\n",q1.cstring);
split(&q1," ",&strs);
for(I=0;I<strs.size();I++)
{
printf("S[%d]: '%s'\n",I,strs[I].cstring);
}
q1 = "A\tbunch of tab\tseparated \tvariables\t";
printf("string='%s'\n",q1.cstring);
split(&q1,"\t",&strs);
for(I=0;I<strs.size();I++)
{
printf("S[%d]: '%s'\n",I,strs[I].cstring);
}
q1 = "Delimiter is abcd, a ab abcd qabcqdqabcdq";
printf("string='%s'\n",q1.cstring);
split(&q1,"abcd",&strs);
for(I=0;I<strs.size();I++)
{
printf("S[%d]: '%s'\n",I,strs[I].cstring);
}
q1 = "abc";
printf("string='%s'\n",q1.cstring);
split(&q1,"abcd",&strs);
for(I=0;I<strs.size();I++)
{
printf("S[%d]: '%s'\n",I,strs[I].cstring);
}
q1 = "";
printf("string='%s'\n",q1.cstring);
split(&q1,"abcd",&strs);
for(I=0;I<strs.size();I++)
{
printf("S[%d]: '%s'\n",I,strs[I].cstring);
}
q1 = "A string not to split.";
printf("string='%s'\n",q1.cstring);
split(&q1,"",&strs);
for(I=0;I<strs.size();I++)
{
printf("S[%d]: '%s'\n",I,strs[I].cstring);
}
q1 = "A string to split\tby\t \twhitespace";
printf("string='%s'\n",q1.cstring);
splitwhitespace(&q1,&strs);
for(I=0;I<strs.size();I++)
{
printf("S[%d]: '%s'\n",I,strs[I].cstring);
}
return;
}
void amsstring3_test_strip()
{
amsstring q1;
std::vector<amsstring> strs;
int I;
q1 = " ";
printf("string ws ='%s'\n",q1.cstring);
stripwhitespace(&q1);
printf("string nows='%s'\n",q1.cstring);
q1 = "\t something = something else\t ";
printf("string ws ='%s'\n",q1.cstring);
stripwhitespace(&q1);
printf("string nows='%s'\n",q1.cstring);
stripwhitespace(NULL);
q1 = "\t something = something else\t ";
printf("string ws ='%s'\n",q1.cstring);
stripallwhitespace(&q1);
printf("string allws='%s'\n",q1.cstring);
return;
}
void amsstring3_test_freadwrite()
{
FILE *fp = NULL;
FILE *fp2 = NULL;
int I;
amsstring q;
std::vector<amsstring> q2;
fp = fopen("../ref/0p375_hexbolt.scad","r");
fp2 = fopen("../ref/testrewrite.scad","w+");
I = 0;
while(!feof(fp))
{
freadline(fp,&q);
printf("Line %d: '%s'\n",I,q.cstring);
I = I + 1;
}
fseek(fp,SEEK_SET,0);
freadlines(fp,&q2);
for(I=0;I<q2.size();I++)
{
printf("Line %d: '%s'\n",I,q2[I].cstring);
}
//q = "This is a test file\nto write\n\thello\n\n";
//fwritelines(fp2,&q);
fwritelines(fp2,&q2);
fclose(fp);
fclose(fp2);
return;
}
void amsstring3_test_concatenation_operators()
{
ams::amsstring a,b,c,d;
a = "";
a = a + "hello";
a = a+ " world\n";
b = a+a;
c = b+a;
printf("%s\n",b.cstring);
return;
}
};

@ -0,0 +1,638 @@
#include <amsstring4/amsstring4.hpp>
namespace ams
{
//UC codepoints
//0x00 to 0x10FFFF (~24 bits, with the remainder being escape sequences and the like)
//different processors order bytes differently (endianness)
//UTF-8
// 0x00 - 0x7F: 1 byte
// 0x00: U+0000 - only when representing the null character
//21 bit values
// 0b0xxxxxxx 0x00000000 0x0000007F
// 0b110xxxxx 01xxxxxx 0x00000080 0x000007FF
// 0b1110xxxx 01xxxxxx 01xxxxxx 0x00000800 0x0000FFFF
// 0b11110xxx 01xxxxxx 01xxxxxx 01xxxxxx 0x00010000 0x0010FFFF
static void _intl_print_ui32bits(uint32_t q)
{
int I;
for(I=32-1;I>=24;I--)
{
if((q & 1<<I)!=0)
{
printf("1");
}
else
{
printf("0");
}
}
printf(" ");
for(I=24-1;I>=16;I--)
{
if((q & 1<<I)!=0)
{
printf("1");
}
else
{
printf("0");
}
}
printf(" ");
for(I=16-1;I>=8;I--)
{
if((q & 1<<I)!=0)
{
printf("1");
}
else
{
printf("0");
}
}
printf(" ");
for(I=8-1;I>=0;I--)
{
if((q & 1<<I)!=0)
{
printf("1");
}
else
{
printf("0");
}
}
return;
}
static void _intl_print_ui8bits(uint8_t q)
{
int I;
for(I=8-1;I>=0;I--)
{
if((q & 1<<I)!=0)
{
printf("1");
}
else
{
printf("0");
}
}
return;
}
int string_to_uccodepoints(const amsstring &str, amsarray<uint32_t> &codepoints)
{
long I,J;
ams_chartype c0;
uint32_t cp0,cpw;
int escmode;
int escs;
int correct = 1;
codepoints.reserve(str.length);
escmode = 0;
escs = 0;
for(I=0;I<str.length+1;I++)
{
c0 = str.cstring[I];
if(c0=='\0')
{
//end of string, terminate search
cp0 = 0;
codepoints.append(cp0);
break;
}
else if(escmode==0 && (c0 & 0b10000000)==0)
{
//normal ASCII character
cp0 = (uint32_t)((unsigned char)c0);
codepoints.append(cp0);
}
else if(escmode==0 && (c0 & 0b11100000)==0b11000000)
{
escmode = 1; escs = 1;
cp0 = 0;
cpw = (uint32_t)(c0 & 0b00011111);
cp0 = cp0 + (cpw<<((uint32_t)6));
//printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
}
else if(escmode==0 && (c0 & 0b11110000)==0b11100000)
{
escmode = 2; escs = 2;
cp0 = 0;
cpw = (uint32_t)(c0 & 0b00001111);
cp0 = cp0 + (cpw<<((uint32_t)12));
//printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
}
else if(escmode==0 && (c0 & 0b11111000)==0b11110000)
{
escmode = 3; escs = 3;
cp0 = 0;
cpw = (uint32_t)(c0 & 0b00000111);
cp0 = cp0 + (cpw<<((uint32_t)18));
//printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
}
else if(escmode!=0 && escs>0)
{
if((c0 & 0b11000000)==0b10000000)
{
cpw = (uint32_t)(c0 & 0b00111111);
cp0 = cp0 + (cpw<<((uint32_t)6*(escs-1)));
escs--;
//printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
}
else
{
//invalid escape character - this is bad UTF-8
correct = 0;
escs = 0;
escmode = 0;
//don't append anything
//printf("incorrect!\n");
}
}
if(escmode!=0 && escs==0)
{
//end escape mode, push character, return to mode 0;
escmode = 0;
escs = 0;
codepoints.append(cp0);
}
} //for chars in string
codepoints.shrink_to_fit();
return correct;
}
int string_to_uccodepoints(const amsstring *str, amsarray<uint32_t> *codepoints)
{
long I,J;
ams_chartype c0;
uint32_t cp0,cpw;
int escmode;
int escs;
int correct = 1;
codepoints->reserve(str->length);
escmode = 0;
escs = 0;
for(I=0;I<str->length+1;I++)
{
c0 = str->cstring[I];
if(c0=='\0')
{
//end of string, terminate search
cp0 = 0;
codepoints->append(cp0);
break;
}
else if(escmode==0 && (c0 & 0b10000000)==0)
{
//normal ASCII character
cp0 = (uint32_t)((unsigned char)c0);
codepoints->append(cp0);
}
else if(escmode==0 && (c0 & 0b11100000)==0b11000000)
{
escmode = 1; escs = 1;
cp0 = 0;
cpw = (uint32_t)(c0 & 0b00011111);
cp0 = cp0 + (cpw<<((uint32_t)6));
//printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
}
else if(escmode==0 && (c0 & 0b11110000)==0b11100000)
{
escmode = 2; escs = 2;
cp0 = 0;
cpw = (uint32_t)(c0 & 0b00001111);
cp0 = cp0 + (cpw<<((uint32_t)12));
//printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
}
else if(escmode==0 && (c0 & 0b11111000)==0b11110000)
{
escmode = 3; escs = 3;
cp0 = 0;
cpw = (uint32_t)(c0 & 0b00000111);
cp0 = cp0 + (cpw<<((uint32_t)18));
//printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
}
else if(escmode!=0 && escs>0)
{
if((c0 & 0b11000000)==0b10000000)
{
cpw = (uint32_t)(c0 & 0b00111111);
cp0 = cp0 + (cpw<<((uint32_t)6*(escs-1)));
escs--;
//printf("debug: escs=%d cp0=",escs); _intl_print_ui32bits(cp0); printf("\n");
}
else
{
//invalid escape character - this is bad UTF-8
correct = 0;
escs = 0;
escmode = 0;
//don't append anything
//printf("incorrect!\n");
}
}
if(escmode!=0 && escs==0)
{
//end escape mode, push character, return to mode 0;
escmode = 0;
escs = 0;
codepoints->append(cp0);
}
} //for chars in string
codepoints->shrink_to_fit();
return correct;
}
void uccodepoints_to_string(const amsarray<uint32_t> &codepoints, amsstring &str)
{
long I,J;
uint8_t ch;
uint32_t cp,bits1,bits2,bits3,bits4;
str.resize(codepoints.length*4+1);
str.cstring[str.length]='\0'; //guard against last char not being \0
J = 0;
for(I=0;I<codepoints.length;I++)
{
cp = codepoints[I];
if(cp==0)
{
str.cstring[J] = '\0';
J++;
break;
}
//21 bit values
// 0b0xxxxxxx 0x00000000 0x0000007F
// 0b110xxxxx 01xxxxxx 0x00000080 0x000007FF
// 0b1110xxxx 01xxxxxx 01xxxxxx 0x00000800 0x0000FFFF
// 0b11110xxx 01xxxxxx 01xxxxxx 01xxxxxx 0x00010000 0x0010FFFF
if(cp>0 && cp<=0x0000007F)
{
ch = (uint8_t) cp;
str.cstring[J] = ((ams_chartype)((unsigned char)ch));
J++;
}
if(cp>=0x00000080 && cp<=0x000007FF)
{
bits1 = (cp & (0b00111111))+0b10000000;
bits2 = ((cp & (0b00011111<<6L))>>6)+0b11000000;
//string.cstring[J] = ((ams_chartype)((unsigned char)bits2));
str.cstring[J] = bits2;
J++;
//string.cstring[J] = ((ams_chartype)((unsigned char)bits1));
str.cstring[J] = bits1;
J++;
}
if(cp>=0x00000800 && cp<=0x0000FFFF)
{
bits1 = (cp & (0b00111111))+0b10000000;
bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
bits3 = ((cp & (0b00001111<<12))>>12)+0b11100000;
str.cstring[J] = ((ams_chartype)((unsigned char)bits3));
J++;
str.cstring[J] = ((ams_chartype)((unsigned char)bits2));
J++;
str.cstring[J] = ((ams_chartype)((unsigned char)bits1));
J++;
}
if(cp>=0x00010000 && cp<=0x010FFFFF)
{
bits1 = (cp & (0b00111111))+0b10000000;
bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
bits3 = ((cp & (0b00111111<<12))>>12)+0b10000000;
bits4 = ((cp & (0b00000111<<18))>>18)+0b11110000;
str.cstring[J] = ((ams_chartype)((unsigned char)bits4));
J++;
str.cstring[J] = ((ams_chartype)((unsigned char)bits3));
J++;
str.cstring[J] = ((ams_chartype)((unsigned char)bits2));
J++;
str.cstring[J] = ((ams_chartype)((unsigned char)bits1));
J++;
}
}
str.shrinktofit();
return;
}
void uccodepoints_to_string(const amsarray<uint32_t> *codepoints, amsstring *str)
{
long I,J;
uint8_t ch;
uint32_t cp,bits1,bits2,bits3,bits4;
str->resize(codepoints->length*4+1);
str->cstring[str->length]='\0'; //guard against last char not being \0
J = 0;
for(I=0;I<codepoints->length;I++)
{
cp = codepoints->at(I);
if(cp==0)
{
str->cstring[J] = '\0';
J++;
break;
}
//21 bit values
// 0b0xxxxxxx 0x00000000 0x0000007F
// 0b110xxxxx 01xxxxxx 0x00000080 0x000007FF
// 0b1110xxxx 01xxxxxx 01xxxxxx 0x00000800 0x0000FFFF
// 0b11110xxx 01xxxxxx 01xxxxxx 01xxxxxx 0x00010000 0x0010FFFF
if(cp>0 && cp<=0x0000007F)
{
ch = (uint8_t) cp;
str->cstring[J] = ((ams_chartype)((unsigned char)ch));
J++;
}
if(cp>=0x00000080 && cp<=0x000007FF)
{
bits1 = (cp & (0b00111111))+0b10000000;
bits2 = ((cp & (0b00011111<<6L))>>6)+0b11000000;
//string.cstring[J] = ((ams_chartype)((unsigned char)bits2));
str->cstring[J] = bits2;
J++;
//string.cstring[J] = ((ams_chartype)((unsigned char)bits1));
str->cstring[J] = bits1;
J++;
}
if(cp>=0x00000800 && cp<=0x0000FFFF)
{
bits1 = (cp & (0b00111111))+0b10000000;
bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
bits3 = ((cp & (0b00001111<<12))>>12)+0b11100000;
str->cstring[J] = ((ams_chartype)((unsigned char)bits3));
J++;
str->cstring[J] = ((ams_chartype)((unsigned char)bits2));
J++;
str->cstring[J] = ((ams_chartype)((unsigned char)bits1));
J++;
}
if(cp>=0x00010000 && cp<=0x010FFFFF)
{
bits1 = (cp & (0b00111111))+0b10000000;
bits2 = ((cp & (0b00111111<<6))>>6)+0b10000000;
bits3 = ((cp & (0b00111111<<12))>>12)+0b10000000;
bits4 = ((cp & (0b00000111<<18))>>18)+0b11110000;
str->cstring[J] = ((ams_chartype)((unsigned char)bits4));
J++;
str->cstring[J] = ((ams_chartype)((unsigned char)bits3));
J++;
str->cstring[J] = ((ams_chartype)((unsigned char)bits2));
J++;
str->cstring[J] = ((ams_chartype)((unsigned char)bits1));
J++;
}
}
str->shrinktofit();
return;
}
void test_unicode_ascii_int_conv()
{
int I;
char c;
unsigned char uc;
int J1,J2;
for(I=0;I<255;I++)
{
c = (char)I;
uc = (unsigned char) c;
J1 = (int)c;
J2 = (int)uc;
printf("%d\t%c\t%c\t%d\t%d\n",I,c,uc,J1,J2);
}
return;
}
static void test_unicode_conv1_sub(uint32_t codept)
{
long I;
amsarray<uint32_t> codepts1;
amsarray<uint32_t> codepts2;
amsstring s1,s2;
codepts1.resize(1);
codepts1[0] = codept;
uccodepoints_to_string(codepts1,s1);
printf("UCC: "); _intl_print_ui32bits(codepts1[0]); printf("\n");
printf("STR: ");
for(I=0;I<s1.length;I++)
{
_intl_print_ui8bits((uint8_t)(unsigned char)s1.cstring[I]);
printf(" ");
}
printf("\n");
string_to_uccodepoints(s1,codepts2);
printf("UCC: "); _intl_print_ui32bits(codepts2[0]); printf("\n");
uccodepoints_to_string(codepts2,s2);
printf("STR: ");
for(I=0;I<s2.length;I++)
{
_intl_print_ui8bits(s2.cstring[I]);
printf(" ");
}
printf("\n");
if(s1==s2)
{
printf("STR: PASS\t");
}
else
{
printf("STR: FAIL\t");
}
if(codepts1[0]==codepts2[0])
{
printf("UCC: PASS\n");
}
else
{
printf("UCC: FAIL\n");
}
//printf("Can I print the char to terminal?: %s\n",s1.cstring);
return;
}
void test_unicode_conv1()
{
uint32_t codept;
//Test bounding cases
codept = 0x7F;
test_unicode_conv1_sub(codept);
printf("\n\n");
codept = 0x07FF;
test_unicode_conv1_sub(codept);
printf("\n\n");
codept = 0xFFFF;
test_unicode_conv1_sub(codept);
printf("\n\n");
codept = 0x0010FFFF;
test_unicode_conv1_sub(codept);
printf("\n\n");
codept = 0x07FFFFFF;
test_unicode_conv1_sub(codept);
printf("\n\n");
return;
}
static int test_unicode_conv2_sub(amsarray<uint32_t> *cp)
{
int I;
int pass = 1;
amsstring s1,s2;
amsarray<uint32_t> cp2;
uccodepoints_to_string(cp,&s1);
string_to_uccodepoints(&s1,&cp2);
uccodepoints_to_string(&cp2,&s2);
if(s1==s2 && *cp==cp2)
{
pass = 1;
}
else
{
pass = 0;
printf("FAIL: \n");
}
return pass;
}
static void test_gen_rand_codepts(int len, amsarray<uint32_t> *cp)
{
long I;
cp->resize(len+1);
for(I=0;I<len;I++)
{
cp->at(I) = ams::randi(1,0x0010FFFF);
}
cp->at(len) = 0;
return;
}
void test_unicode_conv2()
{
long I,J;
int pass;
amsarray<uint32_t> cp;
long ntests = 10000;
long passes = 0;
long failures = 0;
int len = 30;
printf("Testing unicode to string conversion.\n");
printf("%ld tests of %d random codepoints each.\n",ntests,len);
for(I=0;I<ntests;I++)
{
test_gen_rand_codepts(10,&cp);
if(I==0)
{
printf("ex cp string:");
for(J=0;J<cp.length;J++)
{
printf("%d,",cp[J]);
}
printf("\n");
}
pass = test_unicode_conv2_sub(&cp);
if(pass==1)
{
passes++;
}
else
{
failures++;
}
if(I%(ntests/10)==0)
{
printf("Test %ld....\n",I);
}
}
printf("%ld tests, %ld passes, %ld failures.\n",ntests,passes,failures);
return;
}
};

@ -4,5 +4,6 @@ int main(int argc, char* argv[])
{ {
int ret = 0; int ret = 0;
printf("ams string4 library tests.\n"); printf("ams string4 library tests.\n");
return ret; return ret;
} }
Loading…
Cancel
Save