libosmscout  0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules
utf8helper Namespace Reference

Classes

struct  character
 
struct  Parser
 Parse and transform an UTF8 string. More...
 
class  UTF8String
 

Typedefs

using byte = uint8_t
 
using codepoint = uint32_t
 
using Transform = codepoint(*)(const character *, int context)
 functor implements desired transformation of the character It has 2 arguments: More...
 

Functions

static Parser::Exit _p0 (Parser *p, byte bb)
 
static Parser::Exit _p1_u2 (Parser *p, byte bb)
 1 byte: RFC 3629:#4: Valid UTF-8 matches the following syntax 00-7F More...
 
static Parser::Exit _p1_u3 (Parser *p, byte bb)
 
static Parser::Exit _p1_u4 (Parser *p, byte bb)
 
static Parser::Exit _p2_u3 (Parser *p, byte bb)
 
static Parser::Exit _p2_u4 (Parser *p, byte bb)
 
static Parser::Exit _p3_u4 (Parser *p, byte bb)
 
static int _u_size (codepoint u)
 
static char * _u_string (char *buf, codepoint u)
 
codepoint TransformCapitalize (const character *, int)
 
codepoint TransformLower (const character *, int)
 
codepoint TransformLower (const character *ch, [[maybe_unused]] int context)
 
codepoint TransformNop (const character *, int)
 
codepoint TransformNormalize (const character *, int)
 
codepoint TransformTransliterate (const character *, int)
 
codepoint TransformUpper (const character *, int)
 
codepoint TransformUpper (const character *ch, [[maybe_unused]] int context)
 
std::string UTF8Capitalize (const std::string &text)
 
std::string UTF8Normalize (const std::string &text)
 
std::string UTF8ToLower (const std::string &text)
 
std::string UTF8ToUpper (const std::string &text)
 
std::string UTF8Transliterate (const std::string &text)
 

Variables

const character charmap_c2 []
 
const character charmap_c3 []
 
const character charmap_c4 []
 
const character charmap_c5 []
 
const character charmap_c6 []
 
const character charmap_c7 []
 
const character charmap_c8 []
 
const character charmap_c9 []
 
const character charmap_ca []
 
const character charmap_cb []
 
const character charmap_cc []
 
const character charmap_cd []
 
const character charmap_ce []
 
const character charmap_cf []
 
const character charmap_d0 []
 
const character charmap_d1 []
 
const character charmap_d2 []
 
const character charmap_d3 []
 
const character charmap_d4 []
 
const character charmap_d5 []
 
const character charmap_d6 []
 
const character charmap_e1_82 []
 
const character charmap_e1_83 []
 
const character charmap_e1_b8 []
 
const character charmap_e1_b9 []
 
const character charmap_e1_ba []
 
const character charmap_e1_bb []
 
const character charmap_e1_bc []
 
const character charmap_e1_bd []
 
const character charmap_e1_be []
 
const character charmap_e1_bf []
 
const character charmap_e2_80 []
 
const character charmap_e2_81 []
 
const character charmap_e2_82 []
 
const character charmap_e2_b4 []
 
const character charmap_f0_90_92 []
 
const character charmap_f0_90_93 []
 
const character charmap_f0_9e_a4 []
 
const character charmap_us7ascii []
 
constexpr int IsBreaker = 0x02
 
constexpr int IsControl = 0x04
 
constexpr int IsDiacritic = 0x10
 
constexpr int IsModifier = 0x08
 
constexpr int IsPunctuation = 0x20
 
constexpr int IsSpace = 0x01
 
constexpr int None = 0x00
 
constexpr codepoint NullCodepoint = 0
 
const characterpagemap_16 [32]
 
const characterpagemap_24_e1 []
 
const characterpagemap_24_e2 []
 
const characterpagemap_32_f0_90 []
 
const characterpagemap_32_f0_9e []
 

Typedef Documentation

◆ byte

using utf8helper::byte = typedef uint8_t

◆ codepoint

using utf8helper::codepoint = typedef uint32_t

◆ Transform

using utf8helper::Transform = typedef codepoint (*)(const character*, int context)

functor implements desired transformation of the character It has 2 arguments:

  • The character struct matching the current code point
  • The context, it is the category of the preceding sequence It must return the new code point to be dumped instead, or NullCodepoint to discard the sequence.

Function Documentation

◆ _p0()

◆ _p1_u2()

static Parser::Exit utf8helper::_p1_u2 ( Parser p,
byte  bb 
)
static

1 byte: RFC 3629:#4: Valid UTF-8 matches the following syntax 00-7F

2 bytes: RFC 3629:#4: Valid UTF-8 matches the following syntax C2-DF 80-BF

3 bytes: RFC 3629:#4: Valid UTF-8 matches the following syntax E0 A0-BF 80-BF E1-EC 80-BF 80-BF ED 80-9F 80-BF EE-EF 80-BF 80-BF RFC 3629:#6: [EF,BB,BF] is BOM on start, else ZERO WIDTH NO-BREAK SPACE

4 bytes: RFC 3629:#4: Valid UTF-8 matches the following syntax F0 90-BF 80-BF 80-BF F1-F3 80-BF 80-BF 80-BF F4 80-8F 80-BF 80-BF

References _p0(), _u_size(), utf8helper::Parser::b, utf8helper::character::category, utf8helper::Parser::context, utf8helper::Parser::Continue, utf8helper::Parser::Done, utf8helper::Parser::func, None, NullCodepoint, pagemap_16, utf8helper::Parser::run, utf8helper::Parser::u, and utf8helper::Parser::u_size.

Referenced by _p0().

◆ _p1_u3()

static Parser::Exit utf8helper::_p1_u3 ( Parser p,
byte  bb 
)
static

◆ _p1_u4()

static Parser::Exit utf8helper::_p1_u4 ( Parser p,
byte  bb 
)
static

◆ _p2_u3()

◆ _p2_u4()

static Parser::Exit utf8helper::_p2_u4 ( Parser p,
byte  bb 
)
static

◆ _p3_u4()

◆ _u_size()

static int utf8helper::_u_size ( codepoint  u)
inlinestatic

◆ _u_string()

◆ TransformCapitalize()

codepoint utf8helper::TransformCapitalize ( const character ch,
int  context 
)

◆ TransformLower() [1/2]

codepoint utf8helper::TransformLower ( const character ,
int   
)

Referenced by UTF8ToLower().

◆ TransformLower() [2/2]

codepoint utf8helper::TransformLower ( const character ch,
[[maybe_unused] ] int  context 
)

◆ TransformNop()

codepoint utf8helper::TransformNop ( const character ch,
int  context 
)

◆ TransformNormalize()

codepoint utf8helper::TransformNormalize ( const character ch,
int  context 
)

◆ TransformTransliterate()

codepoint utf8helper::TransformTransliterate ( const character ch,
int  context 
)

◆ TransformUpper() [1/2]

codepoint utf8helper::TransformUpper ( const character ,
int   
)

Referenced by UTF8ToUpper().

◆ TransformUpper() [2/2]

codepoint utf8helper::TransformUpper ( const character ch,
[[maybe_unused] ] int  context 
)

◆ UTF8Capitalize()

std::string utf8helper::UTF8Capitalize ( const std::string &  text)

◆ UTF8Normalize()

std::string utf8helper::UTF8Normalize ( const std::string &  text)

◆ UTF8ToLower()

std::string utf8helper::UTF8ToLower ( const std::string &  text)

◆ UTF8ToUpper()

std::string utf8helper::UTF8ToUpper ( const std::string &  text)

◆ UTF8Transliterate()

std::string utf8helper::UTF8Transliterate ( const std::string &  text)

Variable Documentation

◆ charmap_c2

const character utf8helper::charmap_c2

◆ charmap_c3

const character utf8helper::charmap_c3

◆ charmap_c4

const character utf8helper::charmap_c4

◆ charmap_c5

const character utf8helper::charmap_c5

◆ charmap_c6

const character utf8helper::charmap_c6

◆ charmap_c7

const character utf8helper::charmap_c7

◆ charmap_c8

const character utf8helper::charmap_c8

◆ charmap_c9

const character utf8helper::charmap_c9

◆ charmap_ca

const character utf8helper::charmap_ca

◆ charmap_cb

const character utf8helper::charmap_cb

◆ charmap_cc

const character utf8helper::charmap_cc

◆ charmap_cd

const character utf8helper::charmap_cd

◆ charmap_ce

const character utf8helper::charmap_ce

◆ charmap_cf

const character utf8helper::charmap_cf

◆ charmap_d0

const character utf8helper::charmap_d0

◆ charmap_d1

const character utf8helper::charmap_d1

◆ charmap_d2

const character utf8helper::charmap_d2

◆ charmap_d3

const character utf8helper::charmap_d3

◆ charmap_d4

const character utf8helper::charmap_d4

◆ charmap_d5

const character utf8helper::charmap_d5

◆ charmap_d6

const character utf8helper::charmap_d6

◆ charmap_e1_82

const character utf8helper::charmap_e1_82

◆ charmap_e1_83

const character utf8helper::charmap_e1_83

◆ charmap_e1_b8

const character utf8helper::charmap_e1_b8

◆ charmap_e1_b9

const character utf8helper::charmap_e1_b9

◆ charmap_e1_ba

const character utf8helper::charmap_e1_ba

◆ charmap_e1_bb

const character utf8helper::charmap_e1_bb

◆ charmap_e1_bc

const character utf8helper::charmap_e1_bc

◆ charmap_e1_bd

const character utf8helper::charmap_e1_bd

◆ charmap_e1_be

const character utf8helper::charmap_e1_be

◆ charmap_e1_bf

const character utf8helper::charmap_e1_bf

◆ charmap_e2_80

const character utf8helper::charmap_e2_80

◆ charmap_e2_81

const character utf8helper::charmap_e2_81

◆ charmap_e2_82

const character utf8helper::charmap_e2_82

◆ charmap_e2_b4

const character utf8helper::charmap_e2_b4

◆ charmap_f0_90_92

const character utf8helper::charmap_f0_90_92

◆ charmap_f0_90_93

const character utf8helper::charmap_f0_90_93

◆ charmap_f0_9e_a4

const character utf8helper::charmap_f0_9e_a4

◆ charmap_us7ascii

const character utf8helper::charmap_us7ascii

Referenced by _p0().

◆ IsBreaker

constexpr int utf8helper::IsBreaker = 0x02
constexpr

◆ IsControl

constexpr int utf8helper::IsControl = 0x04
constexpr

◆ IsDiacritic

constexpr int utf8helper::IsDiacritic = 0x10
constexpr

◆ IsModifier

constexpr int utf8helper::IsModifier = 0x08
constexpr

◆ IsPunctuation

constexpr int utf8helper::IsPunctuation = 0x20
constexpr

◆ IsSpace

constexpr int utf8helper::IsSpace = 0x01
constexpr

◆ None

constexpr int utf8helper::None = 0x00
constexpr

Referenced by _p1_u2(), _p2_u3(), and _p3_u4().

◆ NullCodepoint

constexpr codepoint utf8helper::NullCodepoint = 0
constexpr

◆ pagemap_16

const character * utf8helper::pagemap_16
Initial value:
= {
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}

Referenced by _p1_u2().

◆ pagemap_24_e1

const character * utf8helper::pagemap_24_e1
Initial value:
= {
nullptr, nullptr, charmap_e1_82, charmap_e1_83, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}

Referenced by _p2_u3().

◆ pagemap_24_e2

const character * utf8helper::pagemap_24_e2
Initial value:
= {
charmap_e2_80, charmap_e2_81, charmap_e2_82, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, charmap_e2_b4, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}

Referenced by _p2_u3().

◆ pagemap_32_f0_90

const character * utf8helper::pagemap_32_f0_90
Initial value:
= {
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, charmap_f0_90_92, charmap_f0_90_93, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}

Referenced by _p3_u4().

◆ pagemap_32_f0_9e

const character * utf8helper::pagemap_32_f0_9e
Initial value:
= {
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, charmap_f0_9e_a4, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}

Referenced by _p3_u4().

utf8helper::charmap_e2_81
const character charmap_e2_81[]
Definition: utf8helper_charmap.cpp:2332
utf8helper::charmap_c9
const character charmap_c9[]
Definition: utf8helper_charmap.cpp:633
utf8helper::charmap_e1_bd
const character charmap_e1_bd[]
Definition: utf8helper_charmap.cpp:2052
utf8helper::charmap_c4
const character charmap_c4[]
Definition: utf8helper_charmap.cpp:298
utf8helper::charmap_cd
const character charmap_cd[]
Definition: utf8helper_charmap.cpp:901
utf8helper::charmap_f0_90_92
const character charmap_f0_90_92[]
Definition: utf8helper_charmap.cpp:2545
utf8helper::charmap_e1_ba
const character charmap_e1_ba[]
Definition: utf8helper_charmap.cpp:1851
utf8helper::charmap_d5
const character charmap_d5[]
Definition: utf8helper_charmap.cpp:1437
utf8helper::charmap_d0
const character charmap_d0[]
Definition: utf8helper_charmap.cpp:1102
utf8helper::charmap_d3
const character charmap_d3[]
Definition: utf8helper_charmap.cpp:1303
utf8helper::charmap_c3
const character charmap_c3[]
Definition: utf8helper_charmap.cpp:231
utf8helper::charmap_e1_be
const character charmap_e1_be[]
Definition: utf8helper_charmap.cpp:2119
utf8helper::charmap_d4
const character charmap_d4[]
Definition: utf8helper_charmap.cpp:1370
utf8helper::charmap_e2_82
const character charmap_e2_82[]
Definition: utf8helper_charmap.cpp:2399
utf8helper::charmap_cf
const character charmap_cf[]
Definition: utf8helper_charmap.cpp:1035
utf8helper::charmap_e2_b4
const character charmap_e2_b4[]
Definition: utf8helper_charmap.cpp:2466
utf8helper::charmap_e1_b9
const character charmap_e1_b9[]
Definition: utf8helper_charmap.cpp:1784
utf8helper::charmap_f0_90_93
const character charmap_f0_90_93[]
Definition: utf8helper_charmap.cpp:2612
utf8helper::charmap_e1_82
const character charmap_e1_82[]
Definition: utf8helper_charmap.cpp:1583
utf8helper::charmap_c2
const character charmap_c2[]
Definition: utf8helper_charmap.cpp:164
utf8helper::charmap_e1_83
const character charmap_e1_83[]
Definition: utf8helper_charmap.cpp:1650
utf8helper::charmap_d1
const character charmap_d1[]
Definition: utf8helper_charmap.cpp:1169
utf8helper::charmap_e1_bf
const character charmap_e1_bf[]
Definition: utf8helper_charmap.cpp:2186
utf8helper::charmap_e1_bc
const character charmap_e1_bc[]
Definition: utf8helper_charmap.cpp:1985
utf8helper::charmap_cc
const character charmap_cc[]
Definition: utf8helper_charmap.cpp:834
utf8helper::charmap_c6
const character charmap_c6[]
Definition: utf8helper_charmap.cpp:432
utf8helper::charmap_ce
const character charmap_ce[]
Definition: utf8helper_charmap.cpp:968
utf8helper::charmap_c7
const character charmap_c7[]
Definition: utf8helper_charmap.cpp:499
utf8helper::charmap_f0_9e_a4
const character charmap_f0_9e_a4[]
Definition: utf8helper_charmap.cpp:2691
utf8helper::charmap_e2_80
const character charmap_e2_80[]
Definition: utf8helper_charmap.cpp:2265
utf8helper::charmap_e1_b8
const character charmap_e1_b8[]
Definition: utf8helper_charmap.cpp:1717
utf8helper::charmap_ca
const character charmap_ca[]
Definition: utf8helper_charmap.cpp:700
utf8helper::charmap_e1_bb
const character charmap_e1_bb[]
Definition: utf8helper_charmap.cpp:1918
utf8helper::charmap_d6
const character charmap_d6[]
Definition: utf8helper_charmap.cpp:1504
utf8helper::charmap_cb
const character charmap_cb[]
Definition: utf8helper_charmap.cpp:767
utf8helper::charmap_c8
const character charmap_c8[]
Definition: utf8helper_charmap.cpp:566
utf8helper::charmap_c5
const character charmap_c5[]
Definition: utf8helper_charmap.cpp:365
utf8helper::charmap_d2
const character charmap_d2[]
Definition: utf8helper_charmap.cpp:1236