Cbeam
Loading...
Searching...
No Matches
utf8.hpp
Go to the documentation of this file.
1/*
2Copyright (c) 2025 acrion innovations GmbH
3Authors: Stefan Zipproth, s.zipproth@acrion.ch
4
5This file is part of Cbeam, see https://github.com/acrion/cbeam and https://cbeam.org
6
7Cbeam is offered under a commercial and under the AGPL license.
8For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
9
10AGPL licensing:
11
12Cbeam is free software: you can redistribute it and/or modify
13it under the terms of the GNU Affero General Public License as published by
14the Free Software Foundation, either version 3 of the License, or
15(at your option) any later version.
16
17Cbeam is distributed in the hope that it will be useful,
18but WITHOUT ANY WARRANTY; without even the implied warranty of
19MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20GNU Affero General Public License for more details.
21
22You should have received a copy of the GNU Affero General Public License
23along with Cbeam. If not, see <https://www.gnu.org/licenses/>.
24*/
25
26#pragma once
27
28#include <cstddef> // for std::size_t
29
30#include <string> // for std::basic_string, std::string
31
33{
35 inline bool is_valid_utf8(const std::string& s)
36 {
37 auto len = s.size();
38 for (std::size_t i = 0; i < len; ++i)
39 {
40 unsigned char c = s[i];
41 if (c < 0x80)
42 {
43 continue;
44 }
45 else if ((c >= 0xC2 && c <= 0xDF) && (i + 1 < len))
46 {
47 if (!(s[i + 1] & 0x80) || (s[i + 1] & 0x40)) return false;
48 ++i;
49 }
50 else if ((c >= 0xE0 && c <= 0xEF) && (i + 2 < len))
51 {
52 if (!(s[i + 1] & 0x80) || (s[i + 1] & 0x40) || !(s[i + 2] & 0x80) || (s[i + 2] & 0x40)) return false;
53 i += 2;
54 }
55 else if ((c >= 0xF0 && c <= 0xF4) && (i + 3 < len))
56 {
57 if (!(s[i + 1] & 0x80) || (s[i + 1] & 0x40) || !(s[i + 2] & 0x80) || (s[i + 2] & 0x40) || !(s[i + 3] & 0x80) || (s[i + 3] & 0x40)) return false;
58 i += 3;
59 }
60 else
61 {
62 return false;
63 }
64 }
65 return true;
66 }
67
75 inline bool has_utf8_specific_encoding(const std::string& s)
76 {
77 bool all_ascii = true;
78 for (unsigned char c : s)
79 {
80 if (c >= 128)
81 {
82 all_ascii = false;
83 break;
84 }
85 }
86 if (all_ascii) return false;
87
88 return is_valid_utf8(s);
89 }
90}
Focuses on UTF-8 checks, character handling, and encoding-specific validations. It includes lightweig...
Definition utf8.hpp:33
bool has_utf8_specific_encoding(const std::string &s)
Checks if the given string uses encoding that is specific to UTF-8.
Definition utf8.hpp:75
bool is_valid_utf8(const std::string &s)
Returns if the given string conforms with UTF8 encoding.
Definition utf8.hpp:35