vt: properly support zero-width Unicode code points

Zero-width Unicode code points are causing misalignment in vertically
aligned content, disrupting the visual layout. Let's handle zero-width
code points more intelligently.

Double-width code points are stored in the screen grid followed by a white
space code point to create the expected screen layout. When a double-width
code point is followed by a zero-width code point in the console incoming
bytestream (e.g., an emoji with a presentation selector) then we may
replace the white space padding by that zero-width code point instead of
dropping it. This maximize screen content information while preserving
proper layout.

If a zero-width code point is preceded by a single-width code point then
the above trick is not possible and such zero-width code point must
be dropped.

VS16 (Variation Selector 16, U+FE0F) is special as it doubles the width
of the preceding single-width code point. We handle that case by giving
VS16 a width of 1 when that happens.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Link: https://lore.kernel.org/r/20250410011839.64418-4-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Nicolas Pitre 2025-04-09 21:13:55 -04:00 committed by Greg Kroah-Hartman
parent 2acaf27cd7
commit e88391f730
2 changed files with 54 additions and 2 deletions

View File

@ -443,6 +443,15 @@ static void vc_uniscr_scroll(struct vc_data *vc, unsigned int top,
}
}
static u32 vc_uniscr_getc(struct vc_data *vc, int relative_pos)
{
int pos = vc->state.x + vc->vc_need_wrap + relative_pos;
if (vc->vc_uni_lines && pos >= 0 && pos < vc->vc_cols)
return vc->vc_uni_lines[vc->state.y][pos];
return 0;
}
static void vc_uniscr_copy_area(u32 **dst_lines,
unsigned int dst_cols,
unsigned int dst_rows,
@ -2905,18 +2914,49 @@ static bool vc_is_control(struct vc_data *vc, int tc, int c)
return false;
}
static void vc_con_rewind(struct vc_data *vc)
{
if (vc->state.x && !vc->vc_need_wrap) {
vc->vc_pos -= 2;
vc->state.x--;
}
vc->vc_need_wrap = 0;
}
static int vc_con_write_normal(struct vc_data *vc, int tc, int c,
struct vc_draw_region *draw)
{
int next_c;
int next_c, prev_c;
unsigned char vc_attr = vc->vc_attr;
u16 himask = vc->vc_hi_font_mask, charmask = himask ? 0x1ff : 0xff;
u8 width = 1;
bool inverse = false;
if (vc->vc_utf && !vc->vc_disp_ctrl) {
if (ucs_is_double_width(c))
if (ucs_is_double_width(c)) {
width = 2;
} else if (ucs_is_zero_width(c)) {
prev_c = vc_uniscr_getc(vc, -1);
if (prev_c == ' ' &&
ucs_is_double_width(vc_uniscr_getc(vc, -2))) {
/*
* Let's merge this zero-width code point with
* the preceding double-width code point by
* replacing the existing whitespace padding.
*/
vc_con_rewind(vc);
} else if (c == 0xfe0f && prev_c != 0) {
/*
* VS16 (U+FE0F) is special. Let it have a
* width of 1 when preceded by a single-width
* code point effectively making the later
* double-width.
*/
} else {
/* Otherwise zero-width code points are ignored */
goto out;
}
}
}
/* Now try to find out how to display it */
@ -2995,6 +3035,8 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c,
tc = ' ';
next_c = ' ';
}
out:
notify_write(vc, c);
if (inverse)

View File

@ -29,6 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c);
int conv_uni_to_8bit(u32 uni);
void console_map_init(void);
bool ucs_is_double_width(uint32_t cp);
static inline bool ucs_is_zero_width(uint32_t cp)
{
/* coming soon */
return false;
}
#else
static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
bool use_unicode)
@ -63,6 +68,11 @@ static inline bool ucs_is_double_width(uint32_t cp)
{
return false;
}
static inline bool ucs_is_zero_width(uint32_t cp)
{
return false;
}
#endif /* CONFIG_CONSOLE_TRANSLATIONS */
#endif /* __LINUX_CONSOLEMAP_H__ */