Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
PIM
KItinerary
Commits
ddd9c3e3
Commit
ddd9c3e3
authored
Sep 22, 2022
by
Volker Krause
Browse files
Move transliteration method to StringUtil
For re-use in person name matching.
parent
903ea188
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/lib/locationutil.cpp
View file @
ddd9c3e3
...
...
@@ -6,6 +6,7 @@
#include
"locationutil.h"
#include
"locationutil_p.h"
#include
"stringutil.h"
#include
<KItinerary/BoatTrip>
#include
<KItinerary/BusTrip>
...
...
@@ -189,41 +190,6 @@ static QString stripDiacritics(const QString &s)
return
res
;
}
// keep this ordered (see https://en.wikipedia.org/wiki/List_of_Unicode_characters)
struct
{
ushort
key
;
const
char
*
replacement
;
}
static
const
transliteration_map
[]
=
{
{
u'ä'
,
"ae"
},
{
u'ö'
,
"oe"
},
{
u'ø'
,
"oe"
},
{
u'ü'
,
"ue"
}
};
static
QString
applyTransliterations
(
const
QString
&
s
)
{
QString
res
;
res
.
reserve
(
s
.
size
());
for
(
const
auto
c
:
s
)
{
const
auto
it
=
std
::
lower_bound
(
std
::
begin
(
transliteration_map
),
std
::
end
(
transliteration_map
),
c
,
[](
const
auto
&
lhs
,
const
auto
rhs
)
{
return
QChar
(
lhs
.
key
)
<
rhs
;
});
if
(
it
!=
std
::
end
(
transliteration_map
)
&&
QChar
((
*
it
).
key
)
==
c
)
{
res
+=
QString
::
fromUtf8
((
*
it
).
replacement
);
continue
;
}
if
(
c
.
decompositionTag
()
==
QChar
::
Canonical
)
{
// see above
res
+=
c
.
decomposition
().
at
(
0
);
}
else
{
res
+=
c
;
}
}
return
res
;
}
static
bool
compareSpaceCaseInsenstive
(
const
QString
&
lhs
,
const
QString
&
rhs
)
{
auto
lit
=
lhs
.
begin
();
...
...
@@ -262,8 +228,8 @@ static bool isSameLocationName(const QString &lhs, const QString &rhs, LocationU
// check if any of the Unicode normalization approaches helps
const
auto
lhsNormalized
=
stripDiacritics
(
lhs
);
const
auto
rhsNormalized
=
stripDiacritics
(
rhs
);
const
auto
lhsTransliterated
=
applyT
ransliterat
ions
(
lhs
);
const
auto
rhsTransliterated
=
applyT
ransliterat
ions
(
rhs
);
const
auto
lhsTransliterated
=
StringUtil
::
t
ransliterat
e
(
lhs
);
const
auto
rhsTransliterated
=
StringUtil
::
t
ransliterat
e
(
rhs
);
if
(
compareSpaceCaseInsenstive
(
lhsNormalized
,
rhsNormalized
)
||
compareSpaceCaseInsenstive
(
lhsNormalized
,
rhsTransliterated
)
||
compareSpaceCaseInsenstive
(
lhsTransliterated
,
rhsNormalized
)
||
compareSpaceCaseInsenstive
(
lhsTransliterated
,
rhsTransliterated
))
{
return
true
;
...
...
src/lib/stringutil.cpp
View file @
ddd9c3e3
...
...
@@ -118,3 +118,38 @@ QString StringUtil::clean(const QString &s)
{
return
KCharsets
::
resolveEntities
(
s
).
simplified
();
}
// keep this ordered (see https://en.wikipedia.org/wiki/List_of_Unicode_characters)
struct
{
ushort
key
;
const
char
*
replacement
;
}
static
const
transliteration_map
[]
=
{
{
u'ä'
,
"ae"
},
{
u'ö'
,
"oe"
},
{
u'ø'
,
"oe"
},
{
u'ü'
,
"ue"
}
};
QString
StringUtil
::
transliterate
(
QStringView
s
)
{
QString
res
;
res
.
reserve
(
s
.
size
());
for
(
const
auto
c
:
s
)
{
const
auto
it
=
std
::
lower_bound
(
std
::
begin
(
transliteration_map
),
std
::
end
(
transliteration_map
),
c
,
[](
const
auto
&
lhs
,
const
auto
rhs
)
{
return
QChar
(
lhs
.
key
)
<
rhs
;
});
if
(
it
!=
std
::
end
(
transliteration_map
)
&&
QChar
((
*
it
).
key
)
==
c
)
{
res
+=
QString
::
fromUtf8
((
*
it
).
replacement
);
continue
;
}
if
(
c
.
decompositionTag
()
==
QChar
::
Canonical
)
{
// see above
res
+=
c
.
decomposition
().
at
(
0
);
}
else
{
res
+=
c
;
}
}
return
res
;
}
src/lib/stringutil.h
View file @
ddd9c3e3
...
...
@@ -29,6 +29,9 @@ namespace StringUtil
/** Cleans up extra white spaces and XML entities from @p s. */
QString
clean
(
const
QString
&
s
);
/** Transliterate diacritics or other special characters. */
QString
transliterate
(
QStringView
s
);
}
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment