libosmscout 0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules
utf8helper Namespace Reference

Classes

struct  character
 
struct  Parser
 Parse and transform an UTF8 string. More...
 
class  UTF8String
 

Typedefs

using byte = uint8_t
 
using codepoint = uint32_t
 
using Transform = codepoint(*)(const character *, int context)
 functor implements desired transformation of the character It has 2 arguments:
 

Functions

static Parser::Exit _p0 (Parser *p, byte bb)
 
static Parser::Exit _p1_u2 (Parser *p, byte bb)
 1 byte: RFC 3629:#4: Valid UTF-8 matches the following syntax 00-7F
 
static Parser::Exit _p1_u3 (Parser *p, byte bb)
 
static Parser::Exit _p1_u4 (Parser *p, byte bb)
 
static Parser::Exit _p2_u3 (Parser *p, byte bb)
 
static Parser::Exit _p2_u4 (Parser *p, byte bb)
 
static Parser::Exit _p3_u4 (Parser *p, byte bb)
 
static int _u_size (codepoint u)
 
static char * _u_string (char *buf, codepoint u)
 
codepoint TransformCapitalize (const character *, int)
 
codepoint TransformLower (const character *, int)
 
codepoint TransformNop (const character *, int)
 
codepoint TransformNormalize (const character *, int)
 
codepoint TransformTransliterate (const character *, int)
 
codepoint TransformUpper (const character *, int)
 
std::string UTF8Capitalize (const std::string &text)
 
std::string UTF8Normalize (const std::string &text)
 
std::string UTF8ToLower (const std::string &text)
 
std::string UTF8ToUpper (const std::string &text)
 
std::string UTF8Transliterate (const std::string &text)
 

Variables

const character charmap_c2 []
 
const character charmap_c3 []
 
const character charmap_c4 []
 
const character charmap_c5 []
 
const character charmap_c6 []
 
const character charmap_c7 []
 
const character charmap_c8 []
 
const character charmap_c9 []
 
const character charmap_ca []
 
const character charmap_cb []
 
const character charmap_cc []
 
const character charmap_cd []
 
const character charmap_ce []
 
const character charmap_cf []
 
const character charmap_d0 []
 
const character charmap_d1 []
 
const character charmap_d2 []
 
const character charmap_d3 []
 
const character charmap_d4 []
 
const character charmap_d5 []
 
const character charmap_d6 []
 
const character charmap_e1_82 []
 
const character charmap_e1_83 []
 
const character charmap_e1_b8 []
 
const character charmap_e1_b9 []
 
const character charmap_e1_ba []
 
const character charmap_e1_bb []
 
const character charmap_e1_bc []
 
const character charmap_e1_bd []
 
const character charmap_e1_be []
 
const character charmap_e1_bf []
 
const character charmap_e2_80 []
 
const character charmap_e2_81 []
 
const character charmap_e2_82 []
 
const character charmap_e2_b4 []
 
const character charmap_f0_90_92 []
 
const character charmap_f0_90_93 []
 
const character charmap_f0_9e_a4 []
 
const character charmap_us7ascii []
 
constexpr int IsBreaker = 0x02
 
constexpr int IsControl = 0x04
 
constexpr int IsDiacritic = 0x10
 
constexpr int IsModifier = 0x08
 
constexpr int IsPunctuation = 0x20
 
constexpr int IsSpace = 0x01
 
constexpr int None = 0x00
 
constexpr codepoint NullCodepoint = 0
 
const characterpagemap_16 [32]
 
const characterpagemap_24_e1 []
 
const characterpagemap_24_e2 []
 
const characterpagemap_32_f0_90 []
 
const characterpagemap_32_f0_9e []
 

Typedef Documentation

◆ byte

using utf8helper::byte = typedef uint8_t

◆ codepoint

using utf8helper::codepoint = typedef uint32_t

◆ Transform

using utf8helper::Transform = typedef codepoint (*)(const character*, int context)

functor implements desired transformation of the character It has 2 arguments:

  • The character struct matching the current code point
  • The context, it is the category of the preceding sequence It must return the new code point to be dumped instead, or NullCodepoint to discard the sequence.

Function Documentation

◆ _p0()

◆ _p1_u2()

static Parser::Exit utf8helper::_p1_u2 ( Parser p,
byte  bb 
)
static

1 byte: RFC 3629:#4: Valid UTF-8 matches the following syntax 00-7F

2 bytes: RFC 3629:#4: Valid UTF-8 matches the following syntax C2-DF 80-BF

3 bytes: RFC 3629:#4: Valid UTF-8 matches the following syntax E0 A0-BF 80-BF E1-EC 80-BF 80-BF ED 80-9F 80-BF EE-EF 80-BF 80-BF RFC 3629:#6: [EF,BB,BF] is BOM on start, else ZERO WIDTH NO-BREAK SPACE

4 bytes: RFC 3629:#4: Valid UTF-8 matches the following syntax F0 90-BF 80-BF 80-BF F1-F3 80-BF 80-BF 80-BF F4 80-8F 80-BF 80-BF

References _p0(), _u_size(), utf8helper::Parser::b, utf8helper::character::category, utf8helper::Parser::context, utf8helper::Parser::Continue, utf8helper::Parser::Done, utf8helper::Parser::func, None, NullCodepoint, pagemap_16, utf8helper::Parser::run, utf8helper::Parser::u, and utf8helper::Parser::u_size.

Referenced by _p0().

◆ _p1_u3()

static Parser::Exit utf8helper::_p1_u3 ( Parser p,
byte  bb 
)
static

◆ _p1_u4()

static Parser::Exit utf8helper::_p1_u4 ( Parser p,
byte  bb 
)
static

◆ _p2_u3()

◆ _p2_u4()

static Parser::Exit utf8helper::_p2_u4 ( Parser p,
byte  bb 
)
static

◆ _p3_u4()

◆ _u_size()

static int utf8helper::_u_size ( codepoint  u)
inlinestatic

◆ _u_string()

◆ TransformCapitalize()

codepoint utf8helper::TransformCapitalize ( const character ch,
int  context 
)
extern

◆ TransformLower()

codepoint utf8helper::TransformLower ( const character ch,
int  context 
)
extern

References utf8helper::character::lower.

Referenced by UTF8ToLower().

◆ TransformNop()

codepoint utf8helper::TransformNop ( const character ch,
int  context 
)
extern

◆ TransformNormalize()

codepoint utf8helper::TransformNormalize ( const character ch,
int  context 
)
extern

◆ TransformTransliterate()

codepoint utf8helper::TransformTransliterate ( const character ch,
int  context 
)
extern

◆ TransformUpper()

codepoint utf8helper::TransformUpper ( const character ch,
int  context 
)
extern

References utf8helper::character::upper.

Referenced by UTF8ToUpper().

◆ UTF8Capitalize()

std::string utf8helper::UTF8Capitalize ( const std::string &  text)
extern

◆ UTF8Normalize()

std::string utf8helper::UTF8Normalize ( const std::string &  text)
extern

◆ UTF8ToLower()

std::string utf8helper::UTF8ToLower ( const std::string &  text)
extern

◆ UTF8ToUpper()

std::string utf8helper::UTF8ToUpper ( const std::string &  text)
extern

◆ UTF8Transliterate()

std::string utf8helper::UTF8Transliterate ( const std::string &  text)
extern

Variable Documentation

◆ charmap_c2

const character utf8helper::charmap_c2

◆ charmap_c3

const character utf8helper::charmap_c3

◆ charmap_c4

const character utf8helper::charmap_c4

◆ charmap_c5

const character utf8helper::charmap_c5

◆ charmap_c6

const character utf8helper::charmap_c6

◆ charmap_c7

const character utf8helper::charmap_c7

◆ charmap_c8

const character utf8helper::charmap_c8

◆ charmap_c9

const character utf8helper::charmap_c9

◆ charmap_ca

const character utf8helper::charmap_ca

◆ charmap_cb

const character utf8helper::charmap_cb

◆ charmap_cc

const character utf8helper::charmap_cc

◆ charmap_cd

const character utf8helper::charmap_cd

◆ charmap_ce

const character utf8helper::charmap_ce

◆ charmap_cf

const character utf8helper::charmap_cf

◆ charmap_d0

const character utf8helper::charmap_d0

◆ charmap_d1

const character utf8helper::charmap_d1

◆ charmap_d2

const character utf8helper::charmap_d2

◆ charmap_d3

const character utf8helper::charmap_d3

◆ charmap_d4

const character utf8helper::charmap_d4

◆ charmap_d5

const character utf8helper::charmap_d5

◆ charmap_d6

const character utf8helper::charmap_d6

◆ charmap_e1_82

const character utf8helper::charmap_e1_82

◆ charmap_e1_83

const character utf8helper::charmap_e1_83

◆ charmap_e1_b8

const character utf8helper::charmap_e1_b8

◆ charmap_e1_b9

const character utf8helper::charmap_e1_b9

◆ charmap_e1_ba

const character utf8helper::charmap_e1_ba

◆ charmap_e1_bb

const character utf8helper::charmap_e1_bb

◆ charmap_e1_bc

const character utf8helper::charmap_e1_bc

◆ charmap_e1_bd

const character utf8helper::charmap_e1_bd

◆ charmap_e1_be

const character utf8helper::charmap_e1_be

◆ charmap_e1_bf

const character utf8helper::charmap_e1_bf

◆ charmap_e2_80

const character utf8helper::charmap_e2_80

◆ charmap_e2_81

const character utf8helper::charmap_e2_81

◆ charmap_e2_82

const character utf8helper::charmap_e2_82

◆ charmap_e2_b4

const character utf8helper::charmap_e2_b4

◆ charmap_f0_90_92

const character utf8helper::charmap_f0_90_92

◆ charmap_f0_90_93

const character utf8helper::charmap_f0_90_93

◆ charmap_f0_9e_a4

const character utf8helper::charmap_f0_9e_a4

◆ charmap_us7ascii

const character utf8helper::charmap_us7ascii

Referenced by _p0().

◆ IsBreaker

constexpr int utf8helper::IsBreaker = 0x02
constexpr

◆ IsControl

constexpr int utf8helper::IsControl = 0x04
constexpr

◆ IsDiacritic

constexpr int utf8helper::IsDiacritic = 0x10
constexpr

◆ IsModifier

constexpr int utf8helper::IsModifier = 0x08
constexpr

◆ IsPunctuation

constexpr int utf8helper::IsPunctuation = 0x20
constexpr

◆ IsSpace

constexpr int utf8helper::IsSpace = 0x01
constexpr

◆ None

constexpr int utf8helper::None = 0x00
constexpr

Referenced by _p1_u2(), _p2_u3(), and _p3_u4().

◆ NullCodepoint

constexpr codepoint utf8helper::NullCodepoint = 0
constexpr

◆ pagemap_16

const character * utf8helper::pagemap_16
Initial value:
= {
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}
const character charmap_d4[]
Definition utf8helper_charmap.cpp:1370
const character charmap_c4[]
Definition utf8helper_charmap.cpp:298
const character charmap_ce[]
Definition utf8helper_charmap.cpp:968
const character charmap_c8[]
Definition utf8helper_charmap.cpp:566
const character charmap_d2[]
Definition utf8helper_charmap.cpp:1236
const character charmap_cf[]
Definition utf8helper_charmap.cpp:1035
const character charmap_c7[]
Definition utf8helper_charmap.cpp:499
const character charmap_d5[]
Definition utf8helper_charmap.cpp:1437
const character charmap_cd[]
Definition utf8helper_charmap.cpp:901
const character charmap_ca[]
Definition utf8helper_charmap.cpp:700
const character charmap_c5[]
Definition utf8helper_charmap.cpp:365
const character charmap_d1[]
Definition utf8helper_charmap.cpp:1169
const character charmap_d3[]
Definition utf8helper_charmap.cpp:1303
const character charmap_c9[]
Definition utf8helper_charmap.cpp:633
const character charmap_cc[]
Definition utf8helper_charmap.cpp:834
const character charmap_c2[]
Definition utf8helper_charmap.cpp:164
const character charmap_d0[]
Definition utf8helper_charmap.cpp:1102
const character charmap_d6[]
Definition utf8helper_charmap.cpp:1504
const character charmap_c3[]
Definition utf8helper_charmap.cpp:231
const character charmap_c6[]
Definition utf8helper_charmap.cpp:432
const character charmap_cb[]
Definition utf8helper_charmap.cpp:767

Referenced by _p1_u2().

◆ pagemap_24_e1

const character * utf8helper::pagemap_24_e1
Initial value:
= {
nullptr, nullptr, charmap_e1_82, charmap_e1_83, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}
const character charmap_e1_bc[]
Definition utf8helper_charmap.cpp:1985
const character charmap_e1_bd[]
Definition utf8helper_charmap.cpp:2052
const character charmap_e1_bb[]
Definition utf8helper_charmap.cpp:1918
const character charmap_e1_83[]
Definition utf8helper_charmap.cpp:1650
const character charmap_e1_be[]
Definition utf8helper_charmap.cpp:2119
const character charmap_e1_b8[]
Definition utf8helper_charmap.cpp:1717
const character charmap_e1_ba[]
Definition utf8helper_charmap.cpp:1851
const character charmap_e1_82[]
Definition utf8helper_charmap.cpp:1583
const character charmap_e1_bf[]
Definition utf8helper_charmap.cpp:2186
const character charmap_e1_b9[]
Definition utf8helper_charmap.cpp:1784

Referenced by _p2_u3().

◆ pagemap_24_e2

const character * utf8helper::pagemap_24_e2
Initial value:
= {
charmap_e2_80, charmap_e2_81, charmap_e2_82, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, charmap_e2_b4, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}
const character charmap_e2_80[]
Definition utf8helper_charmap.cpp:2265
const character charmap_e2_82[]
Definition utf8helper_charmap.cpp:2399
const character charmap_e2_b4[]
Definition utf8helper_charmap.cpp:2466
const character charmap_e2_81[]
Definition utf8helper_charmap.cpp:2332

Referenced by _p2_u3().

◆ pagemap_32_f0_90

const character * utf8helper::pagemap_32_f0_90
Initial value:
= {
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, charmap_f0_90_92, charmap_f0_90_93, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}
const character charmap_f0_90_93[]
Definition utf8helper_charmap.cpp:2612
const character charmap_f0_90_92[]
Definition utf8helper_charmap.cpp:2545

Referenced by _p3_u4().

◆ pagemap_32_f0_9e

const character * utf8helper::pagemap_32_f0_9e
Initial value:
= {
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, charmap_f0_9e_a4, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
}
const character charmap_f0_9e_a4[]
Definition utf8helper_charmap.cpp:2691

Referenced by _p3_u4().