ASPiK SDK
cstring.h
1 // This file is part of VSTGUI. It is subject to the license terms
2 // in the LICENSE file found in the top-level directory of this
3 // distribution and at http://github.com/steinbergmedia/vstgui/LICENSE
4 
5 #ifndef __cstring__
6 #define __cstring__
7 
8 #include "vstguifwd.h"
9 #include "optional.h"
10 #include "platform/iplatformstring.h"
11 #include <string>
12 #include <sstream>
13 #include <functional>
14 #include <algorithm>
15 #include <cctype>
16 
17 namespace VSTGUI {
18 
19 //-----------------------------------------------------------------------------
20 template<typename BaseIterator>
21 class UTF8CodePointIterator : public std::iterator<std::bidirectional_iterator_tag, char32_t>
22 {
23 public:
24  using CodePoint = value_type;
25 
26  UTF8CodePointIterator () noexcept = default;
27  UTF8CodePointIterator (const UTF8CodePointIterator& o) noexcept : it (o.it) {}
28  explicit UTF8CodePointIterator (const BaseIterator& iterator) noexcept : it (iterator) {}
29 
30  UTF8CodePointIterator& operator++ () noexcept;
31  UTF8CodePointIterator& operator-- () noexcept;
32  UTF8CodePointIterator operator++ (int) noexcept;
33  UTF8CodePointIterator operator-- (int) noexcept;
34 
35  bool operator== (const UTF8CodePointIterator& other) const noexcept;
36  bool operator!= (const UTF8CodePointIterator& other) const noexcept;
37 
38  CodePoint operator* () const noexcept;
39 
40  BaseIterator base () const noexcept { return it; }
41 private:
42  BaseIterator it;
43 
44  static constexpr uint8_t kFirstBitMask = 128u; // 1000000
45  static constexpr uint8_t kSecondBitMask = 64u; // 0100000
46  static constexpr uint8_t kThirdBitMask = 32u; // 0010000
47  static constexpr uint8_t kFourthBitMask = 16u; // 0001000
48  static constexpr uint8_t kFifthBitMask = 8u; // 0000100
49 };
50 
55 //-----------------------------------------------------------------------------
57 {
58 public:
59  using StringType = std::string;
60  using SizeType = StringType::size_type;
62 
63  UTF8String (UTF8StringPtr str = nullptr);
64  UTF8String (const UTF8String& other);
65  explicit UTF8String (const StringType& str);
66  UTF8String (UTF8String&& other) noexcept;
67  UTF8String (StringType&& str) noexcept;
68 
69  UTF8String& operator= (const UTF8String& other);
70  UTF8String& operator= (const StringType& other);
71  UTF8String& operator= (UTF8String&& other) noexcept;
72  UTF8String& operator= (StringType&& str) noexcept;
73  UTF8String& operator= (UTF8StringPtr str) { assign (str); return *this; }
74 
75  SizeType length () const noexcept { return string.length (); }
76  bool empty () const noexcept { return string.empty (); }
77 
78  void copy (UTF8StringBuffer dst, SizeType dstSize) const noexcept;
79 
80  CodePointIterator begin () const noexcept;
81  CodePointIterator end () const noexcept;
82 
83  bool operator== (UTF8StringPtr str) const noexcept;
84  bool operator!= (UTF8StringPtr str) const noexcept;
85  bool operator== (const UTF8String& str) const noexcept;
86  bool operator!= (const UTF8String& str) const noexcept;
87  bool operator== (const StringType& str) const noexcept;
88  bool operator!= (const StringType& str) const noexcept;
89 
90  UTF8String& operator+= (const UTF8String& other);
91  UTF8String operator+ (const UTF8String& other);
92 
93  void assign (UTF8StringPtr str);
94  void clear () noexcept;
95 
96  const UTF8StringPtr data () const noexcept { return string.data (); }
97  operator const UTF8StringPtr () const noexcept { return data (); }
98  const StringType& getString () const noexcept { return string; }
99  IPlatformString* getPlatformString () const noexcept;
100 
101  explicit operator bool () const = delete;
102 //-----------------------------------------------------------------------------
103 private:
104  StringType string;
105  mutable SharedPointer<IPlatformString> platformString;
106 };
107 
108 inline bool operator== (const UTF8String::StringType& lhs, const UTF8String& rhs) noexcept { return lhs == rhs.getString (); }
109 inline bool operator!= (const UTF8String::StringType& lhs, const UTF8String& rhs) noexcept { return lhs != rhs.getString (); }
110 
111 inline UTF8String operator+ (UTF8StringPtr lhs, const UTF8String& rhs) { return UTF8String (lhs) += rhs; }
112 
113 //-----------------------------------------------------------------------------
114 template<typename T>
115 inline UTF8String toString (const T& value)
116 {
117  return UTF8String (std::to_string (value));
118 }
119 
120 //-----------------------------------------------------------------------------
125 bool isSpace (char32_t character) noexcept;
126 
127 //-----------------------------------------------------------------------------
129 {
130  using CharTestFunc = std::function<bool (char32_t)>;
131  TrimOptions (CharTestFunc&& func = [] (char32_t c) { return isSpace (c); }) : test (std::move (func)) {}
132 
133  TrimOptions& left () { setBit (flags, Flags::kLeft, true); return *this; }
134  TrimOptions& right () { setBit (flags, Flags::kRight, true); return *this; }
135 
136  bool trimLeft () const { return hasBit (flags, Flags::kLeft); }
137  bool trimRight () const { return hasBit (flags, Flags::kRight); }
138 
139  bool operator() (char32_t c) const { return !test (c); }
140 
141 private:
142  enum Flags : uint8_t {
143  kLeft = 1 << 0,
144  kRight = 1 << 1
145  };
146  uint8_t flags {0};
147  CharTestFunc test;
148 };
149 
150 //-----------------------------------------------------------------------------
151 UTF8String trim (const UTF8String& str, TrimOptions options = TrimOptions ().left ().right ());
152 
153 #if VSTGUI_ENABLE_DEPRECATED_METHODS
154 //-----------------------------------------------------------------------------
155 namespace String {
157  UTF8StringBuffer newWithString (UTF8StringPtr string);
159  void free (UTF8StringBuffer buffer);
160 }
161 #endif
162 
163 //-----------------------------------------------------------------------------
171 //-----------------------------------------------------------------------------
173 {
174 public:
175  UTF8StringView () : str (nullptr), byteCount (0) {}
176  UTF8StringView (const UTF8StringPtr string) : str (string) {}
177  UTF8StringView (const UTF8String& string) : str (string.data ()), byteCount (string.length () + 1) {}
178  UTF8StringView (const std::string& string) : str (string.data ()), byteCount (string.size () + 1) {}
179 
180  UTF8StringView (const UTF8StringView& other) noexcept;
181  UTF8StringView& operator= (const UTF8StringView& other) noexcept;
182  UTF8StringView (UTF8StringView&& other) noexcept = default;
183  UTF8StringView& operator= (UTF8StringView&& other) = default;
184 
186  size_t calculateByteCount () const;
187 
189  size_t calculateCharacterCount () const;
190 
192  bool contains (const UTF8StringPtr subString, bool ignoreCase = false) const;
193 
195  bool startsWith (const UTF8StringView& startString) const;
196 
198  bool endsWith (const UTF8StringView& endString) const;
199 
201  double toDouble (uint32_t precision = 8) const;
202 
204  float toFloat (uint32_t precision = 8) const;
205 
207  int64_t toInteger () const;
208 
209  template<typename T>
210  Optional<T> toNumber () const;
211 
212  bool operator== (const UTF8StringPtr otherString) const;
213  bool operator!= (const UTF8StringPtr otherString) const;
214  bool operator== (UTF8StringView otherString) const;
215  operator const UTF8StringPtr () const;
216 //-----------------------------------------------------------------------------
217 private:
218  UTF8StringPtr str;
219  mutable Optional<size_t> byteCount;
220 };
221 
222 //-----------------------------------------------------------------------------
224 {
225 public:
226  UTF8CharacterIterator (const UTF8StringPtr utf8Str)
227  : startPos ((uint8_t*)utf8Str)
228  , currentPos (0)
229  , strLen (std::strlen (utf8Str))
230  {
231  begin ();
232  }
233 
234  UTF8CharacterIterator (const UTF8StringPtr utf8Str, size_t strLen)
235  : startPos ((uint8_t*)utf8Str)
236  , currentPos (0)
237  , strLen (strLen)
238  {
239  begin ();
240  }
241 
242  UTF8CharacterIterator (const std::string& stdStr)
243  : startPos ((uint8_t*)stdStr.c_str ())
244  , currentPos (0)
245  , strLen (stdStr.size ())
246  {
247  begin ();
248  }
249 
250  uint8_t* next ()
251  {
252  if (currentPos)
253  {
254  if (currentPos == back ())
255  {}
256  else if (*currentPos <= 0x7F) // simple ASCII character
257  currentPos++;
258  else
259  {
260  uint8_t characterLength = getByteLength ();
261  if (characterLength)
262  currentPos += characterLength;
263  else
264  currentPos = end (); // error, not an allowed UTF-8 character at this position
265  }
266  }
267  return currentPos;
268  }
269 
270  uint8_t* previous ()
271  {
272  while (currentPos)
273  {
274  --currentPos;
275  if (currentPos < front ())
276  {
277  currentPos = begin ();
278  break;
279  }
280  else
281  {
282  if (*currentPos <= 0x7f || (*currentPos >= 0xC0 && *currentPos <= 0xFD))
283  break;
284  }
285  }
286  return currentPos;
287  }
288 
289  uint8_t getByteLength () const
290  {
291  if (currentPos && currentPos != back ())
292  {
293  if (*currentPos <= 0x7F)
294  return 1;
295  else
296  {
297  if (*currentPos >= 0xC0 && *currentPos <= 0xFD)
298  {
299  if ((*currentPos & 0xF8) == 0xF8)
300  return 5;
301  else if ((*currentPos & 0xF0) == 0xF0)
302  return 4;
303  else if ((*currentPos & 0xE0) == 0xE0)
304  return 3;
305  else if ((*currentPos & 0xC0) == 0xC0)
306  return 2;
307  }
308  }
309  }
310  return 0;
311  }
312 
313  uint8_t* begin () { currentPos = startPos; return currentPos;}
314  uint8_t* end () { currentPos = startPos + strLen; return currentPos; }
315 
316  const uint8_t* front () const { return startPos; }
317  const uint8_t* back () const { return startPos + strLen; }
318 
319  const uint8_t* operator++() { return next (); }
320  const uint8_t* operator--() { return previous (); }
321  bool operator==(uint8_t i) { if (currentPos) return *currentPos == i; return false; }
322  operator uint8_t* () const { return (uint8_t*)currentPos; }
323 
324 protected:
325  uint8_t* startPos;
326  uint8_t* currentPos;
327  size_t strLen;
328 };
329 
330 //-----------------------------------------------------------------------------
331 //-----------------------------------------------------------------------------
332 //-----------------------------------------------------------------------------
333 inline UTF8StringView::UTF8StringView (const UTF8StringView& other) noexcept
334 {
335  *this = other;
336 }
337 
338 //------------------------------------------------------------------------
339 inline UTF8StringView& UTF8StringView::operator= (const UTF8StringView& other) noexcept
340 {
341  str = other.str;
342  if (other.byteCount)
343  byteCount = makeOptional (*other.byteCount);
344  return *this;
345 }
346 
347 //------------------------------------------------------------------------
349 {
350  size_t count = 0;
351  if (str == 0)
352  return count;
353 
354  UTF8CharacterIterator it (str);
355  while (it != it.back ())
356  {
357  count++;
358  ++it;
359  }
360  return count;
361 }
362 
363 //-----------------------------------------------------------------------------
365 {
366  if (!byteCount)
367  byteCount = makeOptional<size_t> (str ? std::strlen (str) + 1 : 0);
368  return *byteCount;
369 }
370 
371 //-----------------------------------------------------------------------------
372 inline bool UTF8StringView::contains (const UTF8StringPtr subString, bool ignoreCase) const
373 {
374  if (ignoreCase)
375  {
376  if (!str || !subString)
377  return false;
378  UTF8CharacterIterator subIt (subString);
380  auto foundIt = std::search (
381  it.begin (), it.end (), subIt.begin (), subIt.end (),
382  [] (uint8_t c1, uint8_t c2) { return std::toupper (c1) == std::toupper (c2); });
383  return foundIt != it.end ();
384  }
385  return (!str || !subString || std::strstr (str, subString) == nullptr) ? false : true;
386 }
387 
388 //-----------------------------------------------------------------------------
389 inline bool UTF8StringView::startsWith (const UTF8StringView& startString) const
390 {
391  if (!str || !startString.str)
392  return false;
393  size_t startStringLen = startString.calculateByteCount ();
394  size_t thisLen = calculateByteCount ();
395  if (startStringLen > thisLen)
396  return false;
397  return std::strncmp (str, startString.str, startStringLen - 1) == 0;
398 }
399 
400 //-----------------------------------------------------------------------------
401 inline bool UTF8StringView::endsWith (const UTF8StringView& endString) const
402 {
403  size_t endStringLen = endString.calculateByteCount ();
404  size_t thisLen = calculateByteCount ();
405  if (endStringLen > thisLen)
406  return false;
407  return endString == UTF8StringView (str + (thisLen - endStringLen));
408 }
409 
410 //-----------------------------------------------------------------------------
411 inline double UTF8StringView::toDouble (uint32_t precision) const
412 {
413  std::istringstream sstream (str);
414  sstream.imbue (std::locale::classic ());
415  sstream.precision (static_cast<std::streamsize> (precision));
416  double result;
417  sstream >> result;
418  return result;
419 }
420 
421 //-----------------------------------------------------------------------------
422 inline float UTF8StringView::toFloat (uint32_t precision) const
423 {
424  return static_cast<float>(toDouble (precision));
425 }
426 
427 //------------------------------------------------------------------------
428 inline int64_t UTF8StringView::toInteger () const
429 {
430  if (auto number = toNumber<int64_t> ())
431  return *number;
432  return 0;
433 }
434 
435 //------------------------------------------------------------------------
436 template<typename T>
437 inline Optional<T> UTF8StringView::toNumber () const
438 {
439  static_assert (std::is_arithmetic<T>::value, "only arithmetic types allowed");
440  std::istringstream sstream (str);
441  sstream.imbue (std::locale::classic ());
442  T number {};
443  sstream >> number;
444  if (!sstream.fail ())
445  return makeOptional (number);
446  return {};
447 }
448 
449 //-----------------------------------------------------------------------------
450 inline bool UTF8StringView::operator== (const UTF8StringPtr otherString) const
451 {
452  if (str == otherString) return true;
453  return (str && otherString) ? (std::strcmp (str, otherString) == 0) : false;
454 }
455 
456 //-----------------------------------------------------------------------------
457 inline bool UTF8StringView::operator!= (const UTF8StringPtr otherString) const
458 {
459  return !(*this == otherString);
460 }
461 
462 //-----------------------------------------------------------------------------
463 inline bool UTF8StringView::operator== (UTF8StringView otherString) const
464 {
465  if (byteCount && otherString.byteCount && *byteCount != *otherString.byteCount)
466  return false;
467  return operator==(otherString.str);
468 }
469 
470 //-----------------------------------------------------------------------------
471 inline UTF8StringView::operator const UTF8StringPtr () const
472 {
473  return str;
474 }
475 
476 //-----------------------------------------------------------------------------
477 template<typename BaseIterator>
478 inline UTF8CodePointIterator<BaseIterator>& UTF8CodePointIterator<BaseIterator>::operator++ () noexcept
479 {
480  auto firstByte = *it;
481 
482  difference_type offset = 1;
483 
484  if (firstByte & kFirstBitMask)
485  {
486  if (firstByte & kThirdBitMask)
487  {
488  if (firstByte & kFourthBitMask)
489  offset = 4;
490  else
491  offset = 3;
492  }
493  else
494  {
495  offset = 2;
496  }
497  }
498  it += offset;
499  return *this;
500 }
501 
502 //-----------------------------------------------------------------------------
503 template<typename BaseIterator>
504 inline UTF8CodePointIterator<BaseIterator>& UTF8CodePointIterator<BaseIterator>::operator-- () noexcept
505 {
506  --it;
507  if (*it & kFirstBitMask)
508  {
509  --it;
510  if ((*it & kSecondBitMask) == 0)
511  {
512  --it;
513  if ((*it & kSecondBitMask) == 0)
514  {
515  --it;
516  }
517  }
518  }
519  return *this;
520 }
521 
522 //-----------------------------------------------------------------------------
523 template<typename BaseIterator>
524 inline UTF8CodePointIterator<BaseIterator> UTF8CodePointIterator<BaseIterator>::operator++ (int) noexcept
525 {
526  auto result = *this;
527  ++(*this);
528  return result;
529 }
530 
531 //-----------------------------------------------------------------------------
532 template<typename BaseIterator>
533 inline UTF8CodePointIterator<BaseIterator> UTF8CodePointIterator<BaseIterator>::operator-- (int) noexcept
534 {
535  auto result = *this;
536  --(*this);
537  return result;
538 }
539 
540 //-----------------------------------------------------------------------------
541 template<typename BaseIterator>
542 inline bool UTF8CodePointIterator<BaseIterator>::operator== (const UTF8CodePointIterator<BaseIterator>& other) const noexcept
543 {
544  return it == other.it;
545 }
546 
547 //-----------------------------------------------------------------------------
548 template<typename BaseIterator>
549 inline bool UTF8CodePointIterator<BaseIterator>::operator!= (const UTF8CodePointIterator<BaseIterator>& other) const noexcept
550 {
551  return it != other.it;
552 }
553 
554 //-----------------------------------------------------------------------------
555 template<typename BaseIterator>
556 inline typename UTF8CodePointIterator<BaseIterator>::CodePoint UTF8CodePointIterator<BaseIterator>::operator* () const noexcept
557 {
558  CodePoint codePoint = 0;
559 
560  auto firstByte = *it;
561 
562  if (firstByte & kFirstBitMask)
563  {
564  if (firstByte & kThirdBitMask)
565  {
566  if (firstByte & kFourthBitMask)
567  {
568  codePoint = static_cast<CodePoint> ((firstByte & 0x07) << 18);
569  auto secondByte = *(it + 1);
570  codePoint += static_cast<CodePoint> ((secondByte & 0x3f) << 12);
571  auto thirdByte = *(it + 2);
572  codePoint += static_cast<CodePoint> ((thirdByte & 0x3f) << 6);
573  auto fourthByte = *(it + 3);
574  codePoint += (fourthByte & 0x3f);
575  }
576  else
577  {
578  codePoint = static_cast<CodePoint> ((firstByte & 0x0f) << 12);
579  auto secondByte = *(it + 1);
580  codePoint += static_cast<CodePoint> ((secondByte & 0x3f) << 6);
581  auto thirdByte = *(it + 2);
582  codePoint += static_cast<CodePoint> ((thirdByte & 0x3f));
583  }
584  }
585  else
586  {
587  codePoint = static_cast<CodePoint> ((firstByte & 0x1f) << 6);
588  auto secondByte = *(it + 1);
589  codePoint += (secondByte & 0x3f);
590  }
591  }
592  else
593  {
594  codePoint = static_cast<CodePoint> (firstByte);
595  }
596  return codePoint;
597 }
598 
599 } // namespace
600 
601 #endif // __cstring__
bool contains(const UTF8StringPtr subString, bool ignoreCase=false) const
Definition: cstring.h:372
int64_t toInteger() const
Definition: cstring.h:428
size_t calculateCharacterCount() const
Definition: cstring.h:348
bool endsWith(const UTF8StringView &endString) const
Definition: cstring.h:401
Definition: cstring.h:223
double toDouble(uint32_t precision=8) const
Definition: cstring.h:411
size_t calculateByteCount() const
Definition: cstring.h:364
Definition: cstring.h:21
Definition: customcontrols.cpp:8
Definition: cstring.h:128
a view on a null terminated UTF-8 String
Definition: cstring.h:172
bool startsWith(const UTF8StringView &startString) const
Definition: cstring.h:389
holds an UTF8 encoded string and a platform representation of it
Definition: cstring.h:56
Definition: optional.h:18
float toFloat(uint32_t precision=8) const
Definition: cstring.h:422