27#ifndef MYGUI_U_STRING_H_
28#define MYGUI_U_STRING_H_
76#ifdef __STDC_ISO_10646__
80 #if defined(__WIN32__) || defined(_WIN32)
83 #if WCHAR_MAX <= 0xFFFF
92#if MYGUI_COMPILER == MYGUI_COMPILER_MSVC
98 #if defined(_NATIVE_WCHAR_T_DEFINED)
99 #define MYGUI_IS_NATIVE_WCHAR_T 1
101 #define MYGUI_IS_NATIVE_WCHAR_T 0
106 #define MYGUI_IS_NATIVE_WCHAR_T 1
111#if MYGUI_COMPILER == MYGUI_COMPILER_MSVC
113 #pragma warning(push)
114 #pragma warning(disable : 4275)
146 static const unsigned char _lead1 = 0xC0;
147 static const unsigned char _lead1_mask = 0x1F;
148 static const unsigned char _lead2 = 0xE0;
149 static const unsigned char _lead2_mask = 0x0F;
150 static const unsigned char _lead3 = 0xF0;
151 static const unsigned char _lead3_mask = 0x07;
152 static const unsigned char _lead4 = 0xF8;
153 static const unsigned char _lead4_mask = 0x03;
154 static const unsigned char _lead5 = 0xFC;
155 static const unsigned char _lead5_mask = 0x01;
156 static const unsigned char _cont = 0x80;
157 static const unsigned char _cont_mask = 0x3F;
174 using dstring = std::basic_string<code_point>;
185 std::runtime_error(_message)
221 class _const_fwd_iterator;
427#if MYGUI_IS_NATIVE_WCHAR_T
434 UString(
const std::wstring& wstr);
440 UString(
const std::string& str);
444 template<
size_type num>
463 size_type size()
const;
465 size_type length()
const;
468 size_type length_Characters()
const;
470 size_type max_size()
const;
472 void reserve(size_type size);
474 void resize(size_type num,
const code_point& val = 0);
480 const code_point* c_str()
const;
482 const code_point* data()
const;
484 size_type capacity()
const;
489 UString substr(size_type index, size_type num = npos)
const;
491 void push_back(unicode_char val);
492#if MYGUI_IS_NATIVE_WCHAR_T
512 const std::string&
asUTF8()
const;
520 const std::wstring&
asWStr()
const;
591#if MYGUI_IS_NATIVE_WCHAR_T
600 return assign(str.data(), str.size());
603 UString& assign(
const utf32string& str);
605 UString& assign(
const char* c_str, size_type num);
615 UString& append(
const code_point* str);
619 UString& append(
const code_point* str, size_type num);
621 UString& append(size_type num, code_point ch);
623 UString& append(iterator start, iterator end);
624#if MYGUI_IS_NATIVE_WCHAR_T
653 UString& insert(size_type index1,
const UString& str, size_type index2, size_type num);
655 void insert(iterator i, iterator start, iterator end);
657 UString& insert(size_type index,
const code_point* str, size_type num);
658#if MYGUI_IS_NATIVE_WCHAR_T
666#if MYGUI_IS_NATIVE_WCHAR_T
676#if MYGUI_IS_NATIVE_WCHAR_T
730#if MYGUI_IS_NATIVE_WCHAR_T
751#if MYGUI_IS_NATIVE_WCHAR_T
762#if MYGUI_IS_NATIVE_WCHAR_T
777#if MYGUI_IS_NATIVE_WCHAR_T
785#if MYGUI_IS_NATIVE_WCHAR_T
803#if MYGUI_IS_NATIVE_WCHAR_T
816#if MYGUI_IS_NATIVE_WCHAR_T
832#if MYGUI_IS_NATIVE_WCHAR_T
845#if MYGUI_IS_NATIVE_WCHAR_T
875#if MYGUI_IS_NATIVE_WCHAR_T
892 operator std::string()
const;
894 operator std::wstring()
const;
896 operator std::string_view()
const
907 static bool _utf16_independent_char(code_point cp);
909 static bool _utf16_surrogate_lead(code_point cp);
911 static bool _utf16_surrogate_follow(code_point cp);
913 static size_t _utf16_char_length(code_point cp);
915 static size_t _utf16_char_length(unicode_char uc);
921 static size_t _utf16_to_utf32(
const code_point in_cp[2], unicode_char& out_uc);
928 static size_t _utf32_to_utf16(
const unicode_char& in_uc, code_point out_cp[2]);
936 static bool _utf8_start_char(
unsigned char cp);
938 static size_t _utf8_char_length(
unsigned char cp);
940 static size_t _utf8_char_length(unicode_char uc);
943 static size_t _utf8_to_utf32(
const unsigned char in_cp[6], unicode_char& out_uc);
945 static size_t _utf32_to_utf8(
const unicode_char& in_uc,
unsigned char out_cp[6]);
948 static size_type _verifyUTF8(
const unsigned char* c_str);
955 static size_type _verifyUTF8(
const char* c_str, size_type num);
976 void _cleanBuffer()
const;
979 void _getBufferStr()
const;
981 void _getBufferWStr()
const;
983 void _getBufferUTF32Str()
const;
985 void _load_buffer_UTF8()
const;
986 void _load_buffer_WStr()
const;
987 void _load_buffer_UTF32()
const;
989 mutable BufferType m_bufferType;
990 mutable size_t m_bufferSize;
1005 return UString(s1).append(s2);
1010 return UString(s1).append(1, c);
1015 return UString(s1).append(1, c);
1020 return UString(s1).append(1, c);
1022#if MYGUI_IS_NATIVE_WCHAR_T
1026 return UString(s1).append(1, c);
1032 return UString().append(1, c).append(s2);
1037 return UString().append(1, c).append(s2);
1042 return UString().append(1, c).append(s2);
1044#if MYGUI_IS_NATIVE_WCHAR_T
1048 return UString().append(1, c).append(s2);
1129#if MYGUI_COMPILER == MYGUI_COMPILER_MSVC
1130 #pragma warning(pop)
base iterator class for UString
ptrdiff_t difference_type
int _setCharacter(unicode_char uc)
size_type _get_index() const
void _become(const _base_iterator &i)
void _seekRev(size_type c)
void _jump_to(size_type index)
void _seekFwd(size_type c)
unicode_char _getCharacter() const
const forward iterator for UString
friend bool operator<=(const _const_fwd_iterator &left, const _const_fwd_iterator &right)
less than or equal
friend bool operator>=(const _const_fwd_iterator &left, const _const_fwd_iterator &right)
greater than or equal
friend bool operator!=(const _const_fwd_iterator &left, const _const_fwd_iterator &right)
inequality operator
friend bool operator<(const _const_fwd_iterator &left, const _const_fwd_iterator &right)
less than
_const_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_const_fwd_iterator & operator++()
pre-increment
_const_fwd_iterator operator-(difference_type n)
subtraction operator
_const_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_const_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_fwd_iterator & operator--()
pre-decrement
friend bool operator==(const _const_fwd_iterator &left, const _const_fwd_iterator &right)
equality operator
_const_fwd_iterator operator+(difference_type n)
addition operator
friend bool operator>(const _const_fwd_iterator &left, const _const_fwd_iterator &right)
greater than
_const_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
const value_type & operator*() const
dereference operator
const reverse iterator for UString
_const_rev_iterator operator-(difference_type n)
subtraction operator
_const_rev_iterator operator+(difference_type n)
addition operator
friend bool operator!=(const _const_rev_iterator &left, const _const_rev_iterator &right)
inequality operator
_const_rev_iterator & operator++()
pre-increment
_const_rev_iterator & operator+=(difference_type n)
addition assignment operator
friend bool operator>=(const _const_rev_iterator &left, const _const_rev_iterator &right)
greater than or equal
friend bool operator<=(const _const_rev_iterator &left, const _const_rev_iterator &right)
less than or equal
friend bool operator==(const _const_rev_iterator &left, const _const_rev_iterator &right)
equality operator
const value_type & operator[](difference_type n) const
dereference at offset operator
friend bool operator<(const _const_rev_iterator &left, const _const_rev_iterator &right)
less than
friend bool operator>(const _const_rev_iterator &left, const _const_rev_iterator &right)
greater than
_const_rev_iterator & operator--()
pre-decrement
const value_type & operator*() const
dereference operator
_const_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
forward iterator for UString
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
_fwd_iterator & operator++()
pre-increment
_fwd_iterator operator-(difference_type n)
subtraction operator
int setCharacter(unicode_char uc)
Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); ...
_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_fwd_iterator operator+(difference_type n)
addition operator
value_type & operator*() const
dereference operator
_fwd_iterator & operator--()
pre-decrement
value_type & operator[](difference_type n) const
dereference at offset operator
forward iterator for UString
_rev_iterator & operator+=(difference_type n)
addition assignment operator
_rev_iterator & operator--()
pre-decrement
value_type & operator*() const
dereference operator
_rev_iterator & operator++()
pre-increment
_rev_iterator operator-(difference_type n)
subtraction operator
_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
value_type & operator[](difference_type n) const
dereference at offset operator
_rev_iterator operator+(difference_type n)
addition operator
invalid_data(const std::string &_message)
constructor takes a string message that can be later retrieved by the what() function
A UTF-16 string with implicit conversion to/from std::string and std::wstring.
reverse_iterator rend()
returns a reverse iterator just past the beginning of the string
UString operator+(UString::code_point c, const UString &s2)
string addition operator
static size_type _verifyUTF8(const unsigned char *c_str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
size_type length() const
Returns the number of code points in the current string.
iterator insert(iterator i, const code_point &ch)
inserts ch before the code point denoted by i
const wchar_t * asWStr_c_str() const
returns the current string in the native form of a nul-terminated wchar_t array
bool operator>(const UString &right) const
greater than operator
size_type find(wchar_t ch, size_type index=0) const
returns the index of the first occurrence ch within the current string, starting at index; returns US...
size_type size() const
Returns the number of code points in the current string.
const code_point * data() const
returns a pointer to the first character in the current string
UString()
default constructor, creates an empty string
int compare(size_type index, size_type length, const wchar_t *w_str, size_type length2) const
compare a substring of str to a substring of the current string, where the substring of str begins at...
const char * asUTF8_c_str() const
returns the current string in UTF-8 form as a nul-terminated char array
static size_type _verifyUTF8(std::string_view str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
bool operator==(const UString &right) const
equality operator
bool operator!=(const UString &right) const
inequality operator
UString & assign(const wchar_t *w_str)
assign w_str to the current string
const unicode_char * asUTF32_c_str() const
returns the current string in UTF-32 form as a nul-terminated unicode_char array
size_type find(const UString &str, size_type index=0) const
returns the index of the first occurrence of str within the current string, starting at index; return...
bool operator>=(const UString &right) const
greater than or equal operator
size_type rfind(const UString &str, size_type index=0) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
UString & assign(std::string_view str)
assign str to the current string (str is treated as a UTF-8 stream)
_const_rev_iterator const_reverse_iterator
const reverse iterator
UString & assign(const wchar_t *w_str, size_type num)
assign the first num characters of w_str to the current string
UString & insert(size_type index, const code_point *str)
inserts str into the current string, at location index
UString operator+(const UString &s1, UString::unicode_char c)
string addition operator
std::wstring * mWStrBuffer
UString operator+(wchar_t c, const UString &s2)
string addition operator
const utf32string & asUTF32() const
returns the current string in UTF-32 form within a utf32string
int setChar(size_type loc, unicode_char ch)
sets the value of the character at loc to the Unicode value ch (UTF-32)
UString & insert(size_type index, size_type num, wchar_t ch)
inserts num copies of ch into the current string, at location index
UString & assign(iterator start, iterator end)
gives the current string the values from start to end
size_type find_last_of(char ch, size_type index=npos) const
returns the index of the first occurrence of ch in the current string, doing a reverse search from in...
int compare(const UString &str) const
compare str to the current string
code_point value_type
value type typedef for use in iterators
bool operator<=(const UString &right) const
less than or equal operator
size_type find_first_of(wchar_t ch, size_type index=0) const
returns the index of the first occurrence of ch in the current string, starting the search at index; ...
size_type rfind(wchar_t ch, size_type index=0) const
returns the location of the first occurrence of ch in the current string, doing a reverse search from...
std::basic_string< unicode_char > utf32string
string type used for returning UTF-32 formatted data
UString operator+(UString::unicode_char c, const UString &s2)
string addition operator
std::ostream & operator<<(std::ostream &os, const UString &s)
std::ostream write operator
UString(std::string_view str)
UString & append(size_type num, wchar_t ch)
appends num repetitions of ch on to the end of the current string
size_type find_first_of(const UString &str, size_type index=0, size_type num=npos) const
Returns the index of the first character within the current string that matches any character in str,...
size_type rfind(const wchar_t *w_str, size_type index, size_type num) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
UString operator+(const UString &s1, char c)
string addition operator
UString operator+(const UString &s1, const UString &s2)
string addition operator
size_type find_first_not_of(wchar_t ch, size_type index=0) const
returns the index of the first character within the current string that does not match ch,...
UString & insert(size_type index, const wchar_t *w_str, size_type num)
inserts num code points of str into the current string, at location index
iterator erase(iterator loc)
removes the code point pointed to by loc, returning an iterator to the next character
std::basic_string< code_point > dstring
bool operator<(const UString &right) const
less than operator
UString(const wchar_t *w_str, size_type length)
duplicate of w_str, length characters long
uint16 code_point
a single UTF-16 code point
size_type find_last_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the first character within the current string that matches any character in str,...
iterator end()
returns an iterator just past the end of the string
unicode_char getChar(size_type loc) const
returns the data point loc evaluated as a UTF-32 value
static const size_type npos
the usual constant representing: not found, no limit, etc
uint32 unicode_char
a single 32-bit Unicode character
UString & operator=(const UString &s)
assignment operator, implicitly casts all compatible types
void insert(iterator i, size_type num, const wchar_t &ch)
inserts num copies of ch into the current string, before the code point denoted by i
_fwd_iterator iterator
iterator
const std::wstring & asWStr() const
returns the current string in the native form of std::wstring
utf32string * mUTF32StrBuffer
UString & operator=(wchar_t ch)
assignment operator
bool inString(unicode_char ch) const
returns true if the given Unicode character ch is in this string
code_point & operator[](size_type index)
code point dereference operator
size_type find_last_of(wchar_t ch, size_type index=npos) const
returns the index of the first occurrence of ch in the current string, doing a reverse search from in...
size_type find_first_not_of(const UString &str, size_type index=0, size_type num=npos) const
returns the index of the first character within the current string that does not match any character ...
void push_back(wchar_t val)
appends val to the end of the string
size_type find(const wchar_t *w_str, size_type index, size_type length) const
returns the index of the first occurrence of str within the current string and within length code poi...
UString & append(const UString &str)
appends str on to the end of the current string
const code_point * c_str() const
returns a pointer to the first character in the current string
code_point & at(size_type loc)
returns a reference to the element in the string at index loc
_const_fwd_iterator const_iterator
const iterator
UString & append(const wchar_t *w_str, size_type num)
appends num characters of str on to the end of the current string
reverse_iterator rbegin()
returns a reverse iterator to the last element of the string
size_t size_type
size type used to indicate string size and character positions within the string
UString & replace(size_type index1, size_type num1, const UString &str)
replaces up to num1 code points of the current string (starting at index1) with str
const std::string & asUTF8() const
returns the current string in UTF-8 form within a std::string
void push_back(unicode_char val)
appends val to the end of the string
UString operator+(const UString &s1, UString::code_point c)
string addition operator
size_type find_last_not_of(wchar_t ch, size_type index=npos) const
returns the index of the last occurrence of a character that does not match ch in the current string,...
UString(const char(&str)[num])
UString operator+(const UString &s1, wchar_t c)
string addition operator
std::wostream & operator<<(std::wostream &os, const UString &s)
std::wostream write operator
iterator begin()
returns an iterator to the first element of the string
size_type find_last_not_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the last character within the current string that does not match any character i...
UString operator+(char c, const UString &s2)
string addition operator
_rev_iterator reverse_iterator
reverse iterator
UString(const wchar_t *w_str)
duplicate of nul-terminated wchar_t array
UString::size_type operator-(const UString::_const_fwd_iterator &left, const UString::_const_fwd_iterator &right)
bool operator<=(const UString::_const_fwd_iterator &left, const UString::_const_fwd_iterator &right)
bool operator==(const UString::_const_fwd_iterator &left, const UString::_const_fwd_iterator &right)
bool operator>=(const UString::_const_fwd_iterator &left, const UString::_const_fwd_iterator &right)
bool operator>(const UString::_const_fwd_iterator &left, const UString::_const_fwd_iterator &right)
bool operator!=(const UString::_const_fwd_iterator &left, const UString::_const_fwd_iterator &right)
float len(float x, float y)
bool operator<(const UString::_const_fwd_iterator &left, const UString::_const_fwd_iterator &right)