unicode::bidi, unicode::bidi_calc, unicode::bidi_calc_types, unicode::bidi_reorder, unicode::bidi_cleanup, unicode::bidi_logical_order, unicode::bidi_combinings, unicode::bidi_needs_embed, unicode::bidi_embed, unicode::bidi_embed_paragraph_level, unicode::bidi_get_direction, unicode::bidi_override — unicode bi-directional algorithm
#include <courier-unicode.h>
struct unicode::bidi_calc_types {bidi_calc_types(const std::u32string & string);
std::vector<unicode_bidi_type_t> types ;
void setbnl(std::u32string & string);
}
std::tuple<std::vector<unicode_bidi_level_t>,
struct unicode_bidi_direction>
unicode::bidi_calc( |
const unicode::bidi_calc_types &ustring) ; |
std::tuple<std::vector<unicode_bidi_level_t>,
struct unicode_bidi_direction>
unicode::bidi_calc( |
const unicode::bidi_calc_types &ustring, |
unicode_bidi_level_t embedding_level) ; |
int
unicode::bidi_reorder( |
std::u32string &string, |
std::vector<unicode_bidi_level_t> &embedding_level, | |
const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, | |
size_t starting_pos=0, | |
size_t n=(size_t)-1) ; |
void
unicode::bidi_reorder( |
std::vector<unicode_bidi_level_t> &embedding_level, |
const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, | |
size_t starting_pos=0, | |
size_t n=(size_t)-1) ; |
void
unicode::bidi_cleanup( |
std::u32string &string, |
const std::function<void (size_t)> &removed_callback=[](size_t){}, | |
int cleanup_options) ; |
int
unicode::bidi_cleanup( |
std::u32string &string, |
std::vector <unicode_bidi_level_t> &levels, | |
const std::function<void (size_t)> &removed_callback=[](size_t){}, | |
int cleanup_options=0) ; |
int
unicode::bidi_cleanup( |
std::u32string &string, |
std::vector <unicode_bidi_level_t> &levels, | |
const std::function<void (size_t)> &removed_callback, | |
int cleanup_options, | |
size_t starting_pos, | |
size_t n) ; |
int
unicode::bidi_logical_order( |
std::u32string &string, |
std::vector <unicode_bidi_level_t> &levels, | |
unicode_bidi_level_t paragraph_embedding, | |
const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, | |
size_t starting_pos=0, | |
size_t n=(size_t)-1) ; |
void
unicode::bidi_combinings( |
const std::u32string &string, |
const std::vector <unicode_bidi_level_t> &levels, | |
const std::function <void (unicode_bidi_level_t
level, size_t level_start, size_t n_chars, size_t
comb_start, size_t n_comb_chars)> &callback) ; |
void
unicode::bidi_combinings( |
const std::u32string &string, |
const std::function <void (unicode_bidi_level_t
level, size_t level_start, size_t n_chars, size_t
comb_start, size_t n_comb_chars)> &callback) ; |
void
unicode::bidi_logical_order( |
std::vector <unicode_bidi_level_t> &levels, |
unicode_bidi_level_t paragraph_embedding, | |
const std::function<void (size_t, size_t)> &reorder_callback, | |
size_t starting_pos=0, | |
size_t n=(size_t)-1) ; |
bool
unicode::bidi_needs_embed( |
const std::u32string &string, |
const std::vector <unicode_bidi_level_t> &levels, | |
const unicode_bidi_level_t (paragraph_embedding=NULL, | |
size_t starting_pos=0, | |
size_t n=(size_t)-1) ; |
int
unicode::bidi_embed( |
const std::u32string &string, |
const std::vector <unicode_bidi_level_t> &levels, | |
unicode_bidi_level_t paragraph_embedding, | |
const std::function<void (const char32_t *,
size_t, bool)> &callback) ; |
std::u32string
unicode::bidi_embed( |
const std::u32string &string, |
const std::vector <unicode_bidi_level_t> &levels, | |
unicode_bidi_level_t paragraph_embedding) ; |
char32_t
unicode_bidi_embed_paragraph_level( |
const std::u32string &string, |
unicode_bidi_level_t paragraph_embedding) ; |
unicode_bidi_direction
bidi_get_direction( |
const std::u32string &string, |
size_t starting_pos=0, | |
size_t n=(size_t)-1) ; |
std::u32string
bidi_override( |
const std::u32string &string, |
unicode_bidi_level_t direction, | |
int cleanup_options=0) ; |
These functions implement the C++ interface for the Unicode Bi-Directional algorithm. See the description of the underlying unicode_bidi(3) C library API for more information. C++ specific notes:
unicode::bidi_calc
returns the directional embedding value buffer and the
calculated paragraph embedding level. Its ustring
is implicitly
converted from a std::u32string
:
std::u32string text; auto [levels, direction]=unicode::bidi_calc(text);
Alternatively a unicode::bidi_calc_types
objects
gets constructed from the same std::u32string
and then passed
directly to unicode::bidi_calc
:
std::u32string text; unicode::bidi_calc_types types{text}; types.setbnl(text); // Optional // types.types is a std::vector of enum_bidi_types_t values auto [levels, direction]=unicode::bidi_calc(types);
This provides the means to access the intermediate
enum_bidi_types_t
values
that get calculated from the Unicode text string.
In all cases the std::u32string
cannot be a
temporary object, and it must remain in scope until
unicode::bidi_calc
()
returns.
The optional setbnl
() method uses unicode_bidi_setbnl(3)
to replace paragraph separators with newline
characters, in the unicode string. It requires the same
unicode string that was passed to the constructor as a
parameter (because the constructor takes a constant
reference, but this method modifies the string.
Several C functions provide a “dry-run”
mode by passing a NULL
pointer. The C++ API provides separate overloads, with
and without the nullable parameter.
Several C functions accept a nullable function
pointer, with the NULL
function pointer specifying no callback. The C++
functions have a std::function
parameter with a
default do-nothing closure.
Several C functions accept two parameters, a Unicode
character pointer and the embedding level buffer, and a
single parameter that specifies the size of both. The
equivalent C++ function takes two discrete parameters,
a std::u32string
and a
std::vector
and returns
an int
; a negative value
if their sizes differ, and 0 if their sizes match, and
the requested function completes. The unicode::bidi_embed
overload that
returns a std::u32string
returns an empty string in case of a mismatch.
unicode::bidi_reorder
reorders the entire string
and its embedding_level
s by
default. The optional starting_pos
and
n
parameters
limit the reordering to the indicated subset of the
original string (specified as the starting position
offset index, and the number of characters).
unicode::bidi_reorder
,
unicode::bidi_cleanup
,
unicode::bidi_logical_order
,
unicode::bidi_needs_embed
and unicode::bidi_get_direction
take two
optional parameters (defaulted values or overloaded)
specifying an optional starting position and number of
characters that define a subset of the original string
that gets reordered, cleaned up, or has its direction
determined.
This unicode::bidi_cleanup
does not trim
off the passed in string and embedding level buffer,
since it affects only a subset of the string. The
number of times the removed character callback gets
invoked indicates how much the substring should be
trimmed off.
unicode::bidi_override
modifies the passed-in string
as follows:
unicode::bidi_cleanup
() is
applied with the specified, or defaulted,
cleanup_options
Either the LRO
or
an RLO
override
marker gets prepended to the Unicode string,
forcing the entire string to be interpreted in a
single rendering direction, when processed by the
Unicode bi-directional algorithm.
unicode::bidi_override
makes it possible to use a Unicode-aware application or
algorithm in a context that only works with text that's
always displayed in a fixed direction, allowing
graceful handling of input containing bi-directional
text.
unicode::literals
namespaceusing namespace unicode::literals; std::u32string foo(std::u32string bar) { return bar + LRO; }
This namespace contains the following constexpr
definitions:
char32_t
arrays
with literal Unicode character strings containing
Unicode directional, isolate, and override markers,
like LRO
, RLO
and others.
CLEANUP_EXTRA
,
CLEANUP_BNL
, and
CLEANUP_CANONICAL
options for unicode::bidi_cleanup
().