libosmscout 0.1
Loading...
Searching...
No Matches
utf8helper Namespace Reference

Classes

struct  character
 
struct  Parser
 Parse and transform an UTF8 string. More...
 
class  UTF8String
 

Typedefs

using byte = uint8_t
 
using codepoint = uint32_t
 
using Transform = codepoint(*)(const character *, int context)
 functor implements desired transformation of the character It has 2 arguments:
 

Functions

static Parser::Exit _p0 (Parser *p, byte bb)
 
static Parser::Exit _p1_u2 (Parser *p, byte bb)
 1 byte: RFC 3629:#4: Valid UTF-8 matches the following syntax 00-7F
 
static Parser::Exit _p1_u3 (Parser *p, byte bb)
 
static Parser::Exit _p1_u4 (Parser *p, byte bb)
 
static Parser::Exit _p2_u3 (Parser *p, byte bb)
 
static Parser::Exit _p2_u4 (Parser *p, byte bb)
 
static Parser::Exit _p3_u4 (Parser *p, byte bb)
 
static int _u_size (codepoint u)
 
static char * _u_string (char *buf, codepoint u)
 
codepoint TransformCapitalize (const character *, int)
 
codepoint TransformLower (const character *, int)
 
codepoint TransformNop (const character *, int)
 
codepoint TransformNormalize (const character *, int)
 
codepoint TransformTransliterate (const character *, int)
 
codepoint TransformUpper (const character *, int)
 
std::string UTF8Capitalize (const std::string &text)
 
std::string UTF8Normalize (const std::string &text)
 
std::string UTF8ToLower (const std::string &text)
 
std::string UTF8ToUpper (const std::string &text)
 
std::string UTF8Transliterate (const std::string &text)
 

Variables

const character charmap_c2 []
 
const character charmap_c3 []
 
const character charmap_c4 []
 
const character charmap_c5 []
 
const character charmap_c6 []
 
const character charmap_c7 []
 
const character charmap_c8 []
 
const character charmap_c9 []
 
const character charmap_ca []
 
const character charmap_cb []
 
const character charmap_cc []
 
const character charmap_cd []
 
const character charmap_ce []
 
const character charmap_cf []
 
const character charmap_d0 []
 
const character charmap_d1 []
 
const character charmap_d2 []
 
const character charmap_d3 []
 
const character charmap_d4 []
 
const character charmap_d5 []
 
const character charmap_d6 []
 
const character charmap_e1_82 []
 
const character charmap_e1_83 []
 
const character charmap_e1_b8 []
 
const character charmap_e1_b9 []
 
const character charmap_e1_ba []
 
const character charmap_e1_bb []
 
const character charmap_e1_bc []
 
const character charmap_e1_bd []
 
const character charmap_e1_be []
 
const character charmap_e1_bf []
 
const character charmap_e2_80 []
 
const character charmap_e2_81 []
 
const character charmap_e2_82 []
 
const character charmap_e2_b4 []
 
const character charmap_f0_90_92 []
 
const character charmap_f0_90_93 []
 
const character charmap_f0_9e_a4 []
 
const character charmap_us7ascii []
 
constexpr int IsBreaker = 0x02
 
constexpr int IsControl = 0x04
 
constexpr int IsDiacritic = 0x10
 
constexpr int IsModifier = 0x08
 
constexpr int IsPunctuation = 0x20
 
constexpr int IsSpace = 0x01
 
constexpr int None = 0x00
 
constexpr codepoint NullCodepoint = 0
 
const characterpagemap_16 [32]
 
const characterpagemap_24_e1 []
 
const characterpagemap_24_e2 []
 
const characterpagemap_32_f0_90 []
 
const characterpagemap_32_f0_9e []
 

Typedef Documentation

◆ byte

using utf8helper::byte = typedef uint8_t

◆ codepoint

using utf8helper::codepoint = typedef uint32_t

◆ Transform

using utf8helper::Transform = typedef codepoint (*)(const character*, int context)

functor implements desired transformation of the character It has 2 arguments:

  • The character struct matching the current code point
  • The context, it is the category of the preceding sequence It must return the new code point to be dumped instead, or NullCodepoint to discard the sequence.

Function Documentation

◆ _p0()

◆ _p1_u2()

static Parser::Exit utf8helper::_p1_u2 ( Parser p,
byte  bb 
)
static

1 byte: RFC 3629:#4: Valid UTF-8 matches the following syntax 00-7F

2 bytes: RFC 3629:#4: Valid UTF-8 matches the following syntax C2-DF 80-BF

3 bytes: RFC 3629:#4: Valid UTF-8 matches the following syntax E0 A0-BF 80-BF E1-EC 80-BF 80-BF ED 80-9F 80-BF EE-EF 80-BF 80-BF RFC 3629:#6: [EF,BB,BF] is BOM on start, else ZERO WIDTH NO-BREAK SPACE

4 bytes: RFC 3629:#4: Valid UTF-8 matches the following syntax F0 90-BF 80-BF 80-BF F1-F3 80-BF 80-BF 80-BF F4 80-8F 80-BF 80-BF

References _p0(), _u_size(), utf8helper::Parser::b, utf8helper::character::category, utf8helper::Parser::context, utf8helper::Parser::Continue, utf8helper::Parser::Done, utf8helper::Parser::func, None, NullCodepoint, pagemap_16, utf8helper::Parser::run, utf8helper::Parser::u, and utf8helper::Parser::u_size.

Referenced by _p0().

◆ _p1_u3()

static Parser::Exit utf8helper::_p1_u3 ( Parser p,
byte  bb 
)
static

◆ _p1_u4()

static Parser::Exit utf8helper::_p1_u4 ( Parser p,
byte  bb 
)
static

◆ _p2_u3()

◆ _p2_u4()

static Parser::Exit utf8helper::_p2_u4 ( Parser p,
byte  bb 
)
static

◆ _p3_u4()

◆ _u_size()

static int utf8helper::_u_size ( codepoint  u)
inlinestatic

◆ _u_string()

◆ TransformCapitalize()

codepoint utf8helper::TransformCapitalize ( const character ch,
int  context 
)
extern

◆ TransformLower()

codepoint utf8helper::TransformLower ( const character ch,
int  context 
)
extern

References utf8helper::character::lower.

Referenced by UTF8ToLower().

◆ TransformNop()

codepoint utf8helper::TransformNop ( const character ch,
int  context 
)
extern

◆ TransformNormalize()

codepoint utf8helper::TransformNormalize ( const character ch,
int  context 
)
extern

◆ TransformTransliterate()

codepoint utf8helper::TransformTransliterate ( const character ch,
int  context 
)
extern

◆ TransformUpper()

codepoint utf8helper::TransformUpper ( const character ch,
int  context 
)
extern

References utf8helper::character::upper.

Referenced by UTF8ToUpper().

◆ UTF8Capitalize()

std::string utf8helper::UTF8Capitalize ( const std::string &  text)
extern

◆ UTF8Normalize()

std::string utf8helper::UTF8Normalize ( const std::string &  text)
extern

◆ UTF8ToLower()

std::string utf8helper::UTF8ToLower ( const std::string &  text)
extern

◆ UTF8ToUpper()

std::string utf8helper::UTF8ToUpper ( const std::string &  text)
extern

◆ UTF8Transliterate()

std::string utf8helper::UTF8Transliterate ( const std::string &  text)
extern

Variable Documentation

◆ charmap_c2

const character utf8helper::charmap_c2

◆ charmap_c3

const character utf8helper::charmap_c3

◆ charmap_c4

const character utf8helper::charmap_c4

◆ charmap_c5

const character utf8helper::charmap_c5

◆ charmap_c6

const character utf8helper::charmap_c6

◆ charmap_c7

const character utf8helper::charmap_c7

◆ charmap_c8

const character utf8helper::charmap_c8

◆ charmap_c9

const character utf8helper::charmap_c9

◆ charmap_ca

const character utf8helper::charmap_ca

◆ charmap_cb

const character utf8helper::charmap_cb

◆ charmap_cc

const character utf8helper::charmap_cc

◆ charmap_cd

const character utf8helper::charmap_cd

◆ charmap_ce

const character utf8helper::charmap_ce

◆ charmap_cf

const character utf8helper::charmap_cf

◆ charmap_d0

const character utf8helper::charmap_d0

◆ charmap_d1

const character utf8helper::charmap_d1

◆ charmap_d2

const character utf8helper::charmap_d2

◆ charmap_d3

const character utf8helper::charmap_d3

◆ charmap_d4

const character utf8helper::charmap_d4

◆ charmap_d5

const character utf8helper::charmap_d5

◆ charmap_d6

const character utf8helper::charmap_d6

◆ charmap_e1_82

const character utf8helper::charmap_e1_82

◆ charmap_e1_83

const character utf8helper::charmap_e1_83

◆ charmap_e1_b8

const character utf8helper::charmap_e1_b8

◆ charmap_e1_b9

const character utf8helper::charmap_e1_b9

◆ charmap_e1_ba

const character utf8helper::charmap_e1_ba

◆ charmap_e1_bb

const character utf8helper::charmap_e1_bb

◆ charmap_e1_bc

const character utf8helper::charmap_e1_bc

◆ charmap_e1_bd

const character utf8helper::charmap_e1_bd

◆ charmap_e1_be

const character utf8helper::charmap_e1_be

◆ charmap_e1_bf

const character utf8helper::charmap_e1_bf

◆ charmap_e2_80

const character utf8helper::charmap_e2_80

◆ charmap_e2_81

const character utf8helper::charmap_e2_81

◆ charmap_e2_82

const character utf8helper::charmap_e2_82

◆ charmap_e2_b4

const character utf8helper::charmap_e2_b4

◆ charmap_f0_90_92

const character utf8helper::charmap_f0_90_92

◆ charmap_f0_90_93

const character utf8helper::charmap_f0_90_93

◆ charmap_f0_9e_a4

const character utf8helper::charmap_f0_9e_a4

◆ charmap_us7ascii

const character utf8helper::charmap_us7ascii

Referenced by _p0().

◆ IsBreaker

constexpr int utf8helper::IsBreaker = 0x02
constexpr

◆ IsControl

constexpr int utf8helper::IsControl = 0x04
constexpr

◆ IsDiacritic

constexpr int utf8helper::IsDiacritic = 0x10
constexpr

◆ IsModifier

constexpr int utf8helper::IsModifier = 0x08
constexpr

◆ IsPunctuation

constexpr int utf8helper::IsPunctuation = 0x20
constexpr

◆ IsSpace

constexpr int utf8helper::IsSpace = 0x01
constexpr

◆ None

constexpr int utf8helper::None = 0x00
constexpr

Referenced by _p1_u2(), _p2_u3(), and _p3_u4().

◆ NullCodepoint

constexpr codepoint utf8helper::NullCodepoint = 0
constexpr

◆ pagemap_16

const character * utf8helper::pagemap_16
Initial value:
= {
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}
const character charmap_d4[]
Definition utf8helper_charmap.cpp:1370
const character charmap_c4[]
Definition utf8helper_charmap.cpp:298
const character charmap_ce[]
Definition utf8helper_charmap.cpp:968
const character charmap_c8[]
Definition utf8helper_charmap.cpp:566
const character charmap_d2[]
Definition utf8helper_charmap.cpp:1236
const character charmap_cf[]
Definition utf8helper_charmap.cpp:1035
const character charmap_c7[]
Definition utf8helper_charmap.cpp:499
const character charmap_d5[]
Definition utf8helper_charmap.cpp:1437
const character charmap_cd[]
Definition utf8helper_charmap.cpp:901
const character charmap_ca[]
Definition utf8helper_charmap.cpp:700
const character charmap_c5[]
Definition utf8helper_charmap.cpp:365
const character charmap_d1[]
Definition utf8helper_charmap.cpp:1169
const character charmap_d3[]
Definition utf8helper_charmap.cpp:1303
const character charmap_c9[]
Definition utf8helper_charmap.cpp:633
const character charmap_cc[]
Definition utf8helper_charmap.cpp:834
const character charmap_c2[]
Definition utf8helper_charmap.cpp:164
const character charmap_d0[]
Definition utf8helper_charmap.cpp:1102
const character charmap_d6[]
Definition utf8helper_charmap.cpp:1504
const character charmap_c3[]
Definition utf8helper_charmap.cpp:231
const character charmap_c6[]
Definition utf8helper_charmap.cpp:432
const character charmap_cb[]
Definition utf8helper_charmap.cpp:767

Referenced by _p1_u2().

◆ pagemap_24_e1

const character * utf8helper::pagemap_24_e1
Initial value:
= {
nullptr, nullptr, charmap_e1_82, charmap_e1_83, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}
const character charmap_e1_bc[]
Definition utf8helper_charmap.cpp:1985
const character charmap_e1_bd[]
Definition utf8helper_charmap.cpp:2052
const character charmap_e1_bb[]
Definition utf8helper_charmap.cpp:1918
const character charmap_e1_83[]
Definition utf8helper_charmap.cpp:1650
const character charmap_e1_be[]
Definition utf8helper_charmap.cpp:2119
const character charmap_e1_b8[]
Definition utf8helper_charmap.cpp:1717
const character charmap_e1_ba[]
Definition utf8helper_charmap.cpp:1851
const character charmap_e1_82[]
Definition utf8helper_charmap.cpp:1583
const character charmap_e1_bf[]
Definition utf8helper_charmap.cpp:2186
const character charmap_e1_b9[]
Definition utf8helper_charmap.cpp:1784

Referenced by _p2_u3().

◆ pagemap_24_e2

const character * utf8helper::pagemap_24_e2
Initial value:
= {
charmap_e2_80, charmap_e2_81, charmap_e2_82, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, charmap_e2_b4, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}
const character charmap_e2_80[]
Definition utf8helper_charmap.cpp:2265
const character charmap_e2_82[]
Definition utf8helper_charmap.cpp:2399
const character charmap_e2_b4[]
Definition utf8helper_charmap.cpp:2466
const character charmap_e2_81[]
Definition utf8helper_charmap.cpp:2332

Referenced by _p2_u3().

◆ pagemap_32_f0_90

const character * utf8helper::pagemap_32_f0_90
Initial value:
= {
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, charmap_f0_90_92, charmap_f0_90_93, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}
const character charmap_f0_90_93[]
Definition utf8helper_charmap.cpp:2612
const character charmap_f0_90_92[]
Definition utf8helper_charmap.cpp:2545

Referenced by _p3_u4().

◆ pagemap_32_f0_9e

const character * utf8helper::pagemap_32_f0_9e
Initial value:
= {
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, charmap_f0_9e_a4, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}
const character charmap_f0_9e_a4[]
Definition utf8helper_charmap.cpp:2691

Referenced by _p3_u4().