Commit 346e50ca authored by Luis Javier Merino's avatar Luis Javier Merino Committed by Tomaz Canabrava
Browse files

Improve support for isolated Hangul Jamo

Korean Hangul can be represented in Unicode either as precomposed Hangul
syllables, or as sequences of alphabetic components called Jamo.

Jamo at U+1100..U+11FF, U+A960..U+A97F, U+D7B0..U+D7FF have conjoining
behavior, a sequence of Jamo for Leading Consonants, Vowels, and
(optionally) Trailing Consonants, in that order, conjoins into a
syllable block, (i.e. a sequence L+V+T*, where + and * have their
typical regex meaning).

To represent a Jamo in isolation, there are several options:

 - Use non-conjoining Jamo from U+3130..U+318F (or the halfwidth forms
   at U+FFA0..U+FFDF.  These blocks covers all Jamo used in modern
   Korean, but not all Jamo used in Old Korean.
 - Use the fillers at U+115F and U+1160 to substitute for non-existing
   Leading Consonants, and Vowels.  Fonts may stretch Jamo into a
   corner.
 - Use Jamo from U+1100..U+11FF, U+A960..U+A97F, U+D7B0..U+D7FF, and
   separate them with non-Korean characters.  Unicode recommends U+200B
   ZERO WID...
parent c26693af
Pipeline #144382 passed with stage
in 2 minutes and 18 seconds
......@@ -925,6 +925,7 @@ void Screen::displayCharacter(uint c)
// Find previous "real character" to try to combine with
int charToCombineWithX = qMin(_cuX, _screenLines.at(_cuY).length());
int charToCombineWithY = _cuY;
bool previousChar = true;
do {
if (charToCombineWithX > 0) {
--charToCombineWithX;
......@@ -933,16 +934,33 @@ void Screen::displayCharacter(uint c)
charToCombineWithX = _screenLines.at(charToCombineWithY).length() - 1;
} else {
// Give up
return;
previousChar = false;
break;
}
// Failsafe
if (charToCombineWithX < 0) {
return;
previousChar = false;
break;
}
} while (_screenLines.at(charToCombineWithY).at(charToCombineWithX).isRightHalfOfDoubleWide());
if (!previousChar) {
if (!Hangul::isHangul(c)) {
return;
} else {
w = 2;
goto notcombine;
}
}
Character &currentChar = _screenLines[charToCombineWithY][charToCombineWithX];
if (Hangul::isHangul(c) && !Hangul::combinesWith(currentChar, c)) {
w = 2;
goto notcombine;
}
if ((currentChar.rendition & RE_EXTENDED_CHAR) == 0) {
const uint chars[2] = {currentChar.character, c};
currentChar.rendition |= RE_EXTENDED_CHAR;
......@@ -954,7 +972,7 @@ void Screen::displayCharacter(uint c)
ushort extendedCharLength;
const uint *oldChars = ExtendedCharTable::instance.lookupExtendedChar(currentChar.character, extendedCharLength);
Q_ASSERT(oldChars);
if (((oldChars) != nullptr) && extendedCharLength < 3) {
if (((oldChars) != nullptr) && extendedCharLength < 8) {
Q_ASSERT(extendedCharLength > 1);
Q_ASSERT(extendedCharLength < 65535); // redundant due to above check
auto chars = std::make_unique<uint[]>(extendedCharLength + 1);
......@@ -969,6 +987,7 @@ void Screen::displayCharacter(uint c)
return;
}
notcombine:
if (_cuX + w > getScreenLineColumns(_cuY)) {
if (getMode(MODE_Wrap)) {
_lineProperties[_cuY] = static_cast<LineProperty>(_lineProperties.at(_cuY) | LINE_WRAPPED);
......
......@@ -2,6 +2,7 @@
set(konsolecharacters_SRCS
CharacterFormat.cpp
CharacterWidth.cpp
Hangul.cpp
LineBlockCharacters.cpp
ExtendedCharTable.cpp
)
......
......@@ -12,6 +12,7 @@
#include "CharacterColor.h"
#include "CharacterWidth.h"
#include "ExtendedCharTable.h"
#include "Hangul.h"
#include "LineBlockCharacters.h"
// Qt
......@@ -160,8 +161,17 @@ public:
static int stringWidth(const uint *ucs4Str, int len)
{
int w = 0;
Hangul::SyllablePos hangulSyllablePos = Hangul::NotInSyllable;
for (int i = 0; i < len; ++i) {
w += width(ucs4Str[i]);
const uint c = ucs4Str[i];
if (!Hangul::isHangul(c)) {
w += width(c);
hangulSyllablePos = Hangul::NotInSyllable;
} else {
w += Hangul::width(c, width(c), hangulSyllablePos);
}
}
return w;
}
......
/*
SPDX-FileCopyrightText: 2022 Luis Javier Merino Morán <ninjalj@gmail.com>
SPDX-License-Identifier: GPL-2.0-or-later
*/
// Own
#include "Hangul.h"
// Qt
#include <QtGlobal>
// Konsole
#include "Character.h"
namespace Konsole
{
int Hangul::width(uint c, int widthFromTable, enum Hangul::SyllablePos &syllablePos)
{
int w = 0;
switch (syllablePos) {
case NotInSyllable:
if (isLeadingJamo(c)) {
// new Hangul syllable
syllablePos = AtLeadingJamo;
w += widthFromTable;
} else if (isLvSyllable(c)) {
// new Hangul syllable
syllablePos = AtVowelJamo;
w += widthFromTable;
} else if (isLvtSyllable(c)) {
// new Hangul syllable
syllablePos = AtTrailingJamo;
w += widthFromTable;
} else if (isVowelJamo(c)) {
// invalid here, isolated Jamo
w += 2;
} else if (isTrailingJamo(c)) {
// invalid here, isolated Jamo
w += 2;
}
break;
case AtLeadingJamo:
if (isLeadingJamo(c)) {
// conjoin
} else if (isLvSyllable(c)) {
// new Hangul syllable
syllablePos = AtVowelJamo;
w += widthFromTable;
} else if (isLvtSyllable(c)) {
// new Hangul syllable
syllablePos = AtTrailingJamo;
w += widthFromTable;
} else if (isVowelJamo(c)) {
syllablePos = AtVowelJamo;
// conjoin
} else if (isTrailingJamo(c)) {
// invalid here, isolated Jamo
syllablePos = NotInSyllable;
w += 2;
}
break;
case AtVowelJamo:
if (isLeadingJamo(c)) {
// new Hangul syllable
syllablePos = AtLeadingJamo;
w += widthFromTable;
} else if (isLvSyllable(c)) {
// new Hangul syllable
syllablePos = AtVowelJamo;
w += widthFromTable;
} else if (isLvtSyllable(c)) {
// new Hangul syllable
syllablePos = AtTrailingJamo;
w += widthFromTable;
} else if (isVowelJamo(c)) {
// conjoin
} else if (isTrailingJamo(c)) {
syllablePos = AtTrailingJamo;
// conjoin
}
break;
case AtTrailingJamo:
if (isLeadingJamo(c)) {
// new Hangul syllable
syllablePos = AtLeadingJamo;
w += widthFromTable;
} else if (isLvSyllable(c)) {
// new Hangul syllable
syllablePos = AtVowelJamo;
w += widthFromTable;
} else if (isLvtSyllable(c)) {
// new Hangul syllable
syllablePos = AtTrailingJamo;
w += widthFromTable;
} else if (isVowelJamo(c)) {
// invalid here, isolated Jamo
syllablePos = NotInSyllable;
w += 2;
} else if (isTrailingJamo(c)) {
// conjoin
}
}
return w;
}
void Hangul::updateHangulSyllablePos(Hangul::SyllablePos &syllablePos, uint c)
{
if (!isHangul(c)) {
syllablePos = NotInSyllable;
}
(void)Hangul::width(c, 0, syllablePos);
}
bool Hangul::validSyllableContinuation(Hangul::SyllablePos syllablePos, uint c)
{
SyllableType type = jamoType(c);
switch (syllablePos) {
case AtLeadingJamo:
return type != Trailing_Jamo && type != Not_Applicable;
case AtVowelJamo:
return type == Trailing_Jamo || type == Vowel_Jamo;
case AtTrailingJamo:
return type == Trailing_Jamo;
default:
return false;
}
}
bool Hangul::combinesWith(Character prevChar, uint c)
{
Hangul::SyllablePos syllablePos = Hangul::NotInSyllable;
if ((prevChar.rendition & RE_EXTENDED_CHAR) == 0) {
const uint prev = prevChar.character;
updateHangulSyllablePos(syllablePos, prev);
} else {
ushort extendedCharLength;
const uint *oldChars = ExtendedCharTable::instance.lookupExtendedChar(prevChar.character, extendedCharLength);
if (oldChars == nullptr) {
return false;
} else {
for (int i = 0; i < extendedCharLength; i++) {
updateHangulSyllablePos(syllablePos, oldChars[i]);
}
}
}
return validSyllableContinuation(syllablePos, c);
}
}
/*
SPDX-FileCopyrightText: 2022 Luis Javier Merino Morán <ninjalj@gmail.com>
SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef HANGUL_H
#define HANGUL_H
#include <QtGlobal>
namespace Konsole
{
class Character;
class Hangul
{
public:
// See HangulSyllableType.txt from the Unicode data distribution
enum SyllableType {
Not_Applicable,
Leading_Jamo,
Vowel_Jamo,
Trailing_Jamo,
LV_Syllable,
LVT_Syllable,
};
enum SyllablePos {
NotInSyllable,
AtLeadingJamo,
AtVowelJamo,
AtTrailingJamo,
};
static int width(uint c, int widthFromTable, enum SyllablePos &syllablePos);
static bool combinesWith(Character prev, uint c);
static bool isHangul(const uint c)
{
return (c >= 0x1100 && c <= 0x11ff) || (c >= 0xa960 && c <= 0xa97f) || (c >= 0xd7b0 && c <= 0xd7ff) || (c >= 0xac00 && c <= 0xd7a3);
}
private:
static bool isLeadingJamo(const uint c)
{
return (c >= 0x1100 && c <= 0x115f) || (c >= 0xa960 && c <= 0xa97f);
}
static bool isVowelJamo(const uint c)
{
return (c >= 0x1160 && c <= 0x11a7) || (c >= 0xd7b0 && c <= 0xd7c6);
}
static bool isTrailingJamo(const uint c)
{
return (c >= 0x11a8 && c <= 0x11ff) || (c >= 0xd7cb && c <= 0xd7ff);
}
static bool isLvSyllable(const uint c)
{
return (c >= 0xac00 && c <= 0xd7a3) && (c % 0x1c == 0);
}
static bool isLvtSyllable(const uint c)
{
return (c >= 0xac00 && c <= 0xd7a3) && (c % 0x1c != 0);
}
static SyllableType jamoType(const uint c)
{
// clang-format off
if (isLeadingJamo(c)) return Leading_Jamo;
if (isVowelJamo(c)) return Vowel_Jamo;
if (isTrailingJamo(c)) return Trailing_Jamo;
if (isLvSyllable(c)) return LV_Syllable;
if (isLvtSyllable(c)) return LVT_Syllable;
return Not_Applicable;
// clang-format on
}
static void updateHangulSyllablePos(Hangul::SyllablePos &syllablePos, uint c);
static bool validSyllableContinuation(Hangul::SyllablePos syllablePos, uint c);
};
}
#endif // HANGUL_H
......@@ -131,7 +131,7 @@ void PlainTextDecoder::decodeLine(const Character *const characters, int count,
// of `dialog --infobox "qwe" 10 10` .
if (characters[i].isRealCharacter || i <= realCharacterGuard) {
characterBuffer.append(characters[i].character);
i += qMax(1, characters[i].width());
i += qMax(1, Character::stringWidth(&characters[i].character, 1));
} else {
++i; // should we 'break' directly here?
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment