Commit 1de2c484 authored by Harald Sitter's avatar Harald Sitter 🏳️‍🌈
Browse files

refactor deleted file detection

instead of using procfs' /map_files use /maps. this will allow us to
also process data after the process had gone away (e.g. when processing
data from coredumpd) and makes no difference for the running process.

the parser is now in its own file and has a unit test

there's also new tech in place that additionally adds stat support. this
is also to aid in future development for coredumpd support. if a library
has changed in the time between the core dumping and drkonqi starting,
then we need a way to figure out if the file has changed even when we
can't rely on the ' (deleted)' marker in the /maps data.
the easiest way to achieve this is to compare the inodes. while that
isn't always accurate it's probably accurate enough for our purposes.
parent c57a22ba
......@@ -38,6 +38,7 @@ set(drkonqi_SRCS
debuggermanager.cpp
applicationdetailsexamples.cpp
statusnotifier.cpp
linuxprocmapsparser.cpp
)
ki18n_wrap_ui(drkonqi_SRCS
......
......@@ -25,6 +25,7 @@
#include "debugger.h"
#include "debuggermanager.h"
#include "drkonqi.h"
#include "linuxprocmapsparser.h"
#ifdef Q_OS_MACOS
#include <AvailabilityMacros.h>
......@@ -123,37 +124,12 @@ CrashedApplication *KCrashBackend::constructCrashedApplication()
fakeBaseName = DrKonqi::appName();
}
QDir mapFilesDir(procPath + QStringLiteral("/map_files"));
mapFilesDir.setFilter(mapFilesDir.filter() | QDir::System); // proc is system!
// "/bin/foo (deleted)" is how the kernel tells us that a file has been deleted since
// it was mmap'd.
QRegularExpression expression(QStringLiteral("(?<path>.+) \\(deleted\\)$"));
// For the map_files we filter only .so files to ensure that
// we don't trip over cache files or the like, as a result we
// manually need to check if the main exe was deleted and add
// it.
// NB: includes .so* and .py* since we also implicitly support snakes to
// a degree
QRegularExpression soExpression(QStringLiteral("(?<path>.+\\.(so|py)([^/]*)) \\(deleted\\)$"));
const auto exeMatch = expression.match(exePath);
if (exeMatch.isValid() && exeMatch.hasMatch()) {
hasDeletedFiles = true;
}
const auto list = mapFilesDir.entryInfoList();
for (auto it = list.constBegin(); !hasDeletedFiles && it != list.constEnd(); ++it) {
const auto match = soExpression.match(it->symLinkTarget());
if (!match.isValid() || !match.hasMatch()) {
continue;
}
const QString path = match.captured(QStringLiteral("path"));
if (path.startsWith(QStringLiteral("/memfd"))) {
// Qml.so's JIT shows up under memfd. This is a false positive against our regex.
continue;
}
hasDeletedFiles = true;
const QString mapsPath = procPath + QStringLiteral("/maps");
QFile mapsFile(mapsPath);
if (mapsFile.open(QFile::ReadOnly)) {
hasDeletedFiles = LinuxProc::hasMapsDeletedFiles(exePath, mapsFile.readAll(), LinuxProc::Check::DeletedMarker);
} else {
qCWarning(DRKONQI_LOG) << "failed to open maps file" << mapsPath;
}
qCDebug(DRKONQI_LOG) << "exe" << exePath << "has deleted files:" << hasDeletedFiles;
......
/*
SPDX-License-Identifier: GPL-2.0-or-later
SPDX-FileCopyrightText: 2021 Harald Sitter <sitter@kde.org>
*/
#include "linuxprocmapsparser.h"
#include <QByteArray>
#include <QByteArrayList>
#include <QDebug>
#include <QFile>
#include <QRegularExpression>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "drkonqi_debug.h"
bool LinuxProc::isLibraryPath(const QString &path)
{
// This intentionally matches potential suffixes, i.e. "/usr/lib/foo.so.0" but also "foo.so (deleted)"
static QRegularExpression soExpression(QStringLiteral("(?<path>.+\\.(so|py)([^/]*))$"));
const auto soMatch = soExpression.match(path);
return soMatch.isValid() && soMatch.hasMatch() && !soMatch.captured(u"path").isEmpty();
}
bool LinuxProc::hasMapsDeletedFiles(const QString &exePathString, const QByteArray &maps, Check check)
{
const QByteArray exePath = QFile::encodeName(exePathString);
const QByteArrayList lines = maps.split('\n');
for (const auto &line : lines) {
if (line.isEmpty()) {
continue;
}
// Walk string by tokens. This is by far the easiest way to parse the format as anything after
// the first 5 fields (minus the tokens) is the pathname. The pathname may be nothing, or contain more
// spaces in turn. Qt has no convenient API for this, use strtok.
QByteArray mutableLine = line;
// address
strtok(mutableLine.data(), " ");
// perms
strtok(nullptr, " ");
// offset
strtok(nullptr, " ");
// dev
strtok(nullptr, " ");
// inode
const QByteArray inode(strtok(nullptr, " "));
// remainder is the pathname
const QByteArray pathname = QByteArray(strtok(nullptr, "\n")).simplified(); // simplify to make evaluation easier
if (pathname.isEmpty() || pathname.at(0) != QLatin1Char('/')) {
// Could be pseudo entry like [heap] or anonymous region.
continue;
}
if (pathname.startsWith(QByteArrayLiteral("/memfd"))) {
// Qml.so's JIT shows up under memfd. This is a false positive as it isn't a real path in the
// file system. Skip over it.
continue;
}
const QByteArray deletedMarker = QByteArrayLiteral(" (deleted)");
// We filter only .so files to ensure that we don't trip over cache files or the like.
// NB: includes .so* and .py* since we also implicitly support snakes to
// a degree
// As a result we need to explicitly look for the main executable.
if (pathname == exePath + deletedMarker) {
return true;
}
if (pathname != exePath && !isLibraryPath(QFile::decodeName(pathname))) {
continue; // not exe and not a library.
}
// Deleted marker always declares something missing. Even when we perform additional stat checks on it.
if (pathname.endsWith(deletedMarker)) {
return true;
}
switch (check) {
case Check::DeletedMarker: {
// If we get here the file hasn't been marked deleted.
break;
}
case Check::Stat: {
struct stat info {
};
const int ret = stat(pathname.constData(), &info);
if (ret == -1) {
qCWarning(DRKONQI_LOG) << "Couldn't stat file, assuming it was deleted" << pathname << strerror(errno);
return true;
break;
}
if (info.st_ino != inode.toULongLong()) {
qCWarning(DRKONQI_LOG) << "Found mismatching inode on" << pathname << info.st_ino << inode;
return true;
break;
}
// It's very awkward but st_dev seems dodgy at least with btrfs. The dev_t the kernel has is not the one
// stat has and what's more the kernel has one that solid doesn't know about either. That may simply be
// because btrfs makes up fake dev_ts since multiple btrfs subvolumes may be on the same block device.
// Anyway, it's unfortunate but I guess we had best not look at the device.
} break;
}
}
return false;
}
/*
SPDX-License-Identifier: GPL-2.0-or-later
SPDX-FileCopyrightText: 2021 Harald Sitter <sitter@kde.org>
*/
#pragma once
#include <QByteArray>
namespace LinuxProc
{
enum class Check {
DeletedMarker, // < checks the data without disk IO (purely relies on deleted annotation - process must be running!)
Stat, // < includes inode check
};
// Checks if the /maps content has deleted libraries, or the executable is deleted.
bool hasMapsDeletedFiles(const QString &exePathString, const QByteArray &maps, Check check);
// Checks if a given path is a library path (this is also true if it has a "(deleted)" qualifier)
// This is a standalone function to ease testing.
bool isLibraryPath(const QString &path);
}
......@@ -32,6 +32,14 @@ ecm_add_tests(
drkonqi_backtrace_parser
)
ecm_add_tests(
linuxprocmapsparsertest
LINK_LIBRARIES
Qt::Core
Qt::Test
DrKonqiInternal
)
if(NOT APPLE)
if(NOT RUBY_EXECTUABLE)
find_program(RUBY_EXECUTABLE ruby)
......
55655b57b000-55655b585000 r--p 00000000 00:1b 36066866 /usr/bin/kwrite (deleted)
55f469b92000-55f46ab0f000 rw-p 00000000 00:00 0 [heap]
SPDX-License-Identifier: CC0-1.0
SPDX-FileCopyrightText: none
55655b57b000-55655b585000 r--p 00000000 00:1b 36066866 /usr/lib/foo.so (deleted)
55f469b92000-55f46ab0f000 rw-p 00000000 00:00 0 [heap]
7feac05c6000-7feac07f9000 rw-s 00000000 00:01 244999 /memfd:xorg (deleted)
SPDX-License-Identifier: CC0-1.0
SPDX-FileCopyrightText: none
55f467eab000-55f467eb5000 r--p 00000000 00:1b __INODE__ /__FILE_PATH__
55f469b92000-55f46ab0f000 rw-p 00000000 00:00 0 [heap]
7feac05c6000-7feac07f9000 rw-s 00000000 00:01 244999 /memfd:xorg (deleted)
SPDX-License-Identifier: CC0-1.0
SPDX-FileCopyrightText: none
/*
SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
SPDX-FileCopyrightText: 2021 Harald Sitter <sitter@kde.org>
*/
#include <QTest>
#include <limits>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <linuxprocmapsparser.h>
class LinuxProcMapsParserTest : public QObject
{
Q_OBJECT
qulonglong m_linuxProcfsMapsInInode = 0;
QByteArray generateData(const QString &templatePath, qulonglong inode)
{
QFile f(templatePath);
if (!f.open(QFile::ReadOnly)) {
return {};
}
QByteArray data = f.readAll();
data.replace("/__FILE_PATH__", qUtf8Printable(templatePath));
data.replace("__INODE__", QByteArray::number(inode));
return data;
}
private Q_SLOTS:
void initTestCase()
{
struct stat st {
};
QVERIFY(stat(QFile::encodeName(QFINDTESTDATA("data/linux-procfs-maps.so")).constData(), &st) != -1);
m_linuxProcfsMapsInInode = st.st_ino;
}
void testHasDeletedStat()
{
// Has definitely missing files because of "(deleted)" suffix.
// The exe is not actually in the fixture to prevent the function from returning early when not finding the exe.
// Should this need changing in the future this needs routing through generateData().
QFile f(QFINDTESTDATA("data/linux-procfs-maps-with-missing-files"));
QVERIFY(f.open(QFile::ReadOnly));
QVERIFY(LinuxProc::hasMapsDeletedFiles("/usr/bin/kwrite", f.readAll(), LinuxProc::Check::DeletedMarker));
}
void testNoDeletedStat()
{
// fixture also includes a /memfd:... path which we expect to not get reported as deleted. We do need
// to substitute valid data in to get past the inode verification though.
const QString templatePath = QFINDTESTDATA("data/linux-procfs-maps.so");
const QByteArray data = generateData(templatePath, m_linuxProcfsMapsInInode);
QVERIFY(!LinuxProc::hasMapsDeletedFiles(templatePath, data, LinuxProc::Check::Stat));
}
void testBadInode()
{
const QString templatePath = QFINDTESTDATA("data/linux-procfs-maps.so");
const QByteArray data = generateData(templatePath, m_linuxProcfsMapsInInode - 1 /* random mutation */);
QVERIFY(LinuxProc::hasMapsDeletedFiles(templatePath, data, LinuxProc::Check::Stat));
}
void testExecMarker()
{
QFile f(QFINDTESTDATA("data/linux-procfs-maps-with-deleted-exe"));
QVERIFY(f.open(QFile::ReadOnly));
QVERIFY(LinuxProc::hasMapsDeletedFiles("/usr/bin/kwrite", f.readAll(), LinuxProc::Check::DeletedMarker));
}
void testSOMarker()
{
QFile f(QFINDTESTDATA("data/linux-procfs-maps-with-deleted-exe"));
QVERIFY(f.open(QFile::ReadOnly));
QVERIFY(LinuxProc::hasMapsDeletedFiles("/usr/bin/kwrite", f.readAll(), LinuxProc::Check::DeletedMarker));
}
void testIsLibraryPath()
{
QVERIFY(!LinuxProc::isLibraryPath("/bin/a"));
QVERIFY(!LinuxProc::isLibraryPath("/bin/a."));
QVERIFY(LinuxProc::isLibraryPath("/bin/a.so"));
QVERIFY(LinuxProc::isLibraryPath("/bin/a.so.0.1"));
// we expect deleted modifiers to match, it keeps the regex simpler
QVERIFY(LinuxProc::isLibraryPath("/bin/a.so.0.1 (deleted)"));
// A bit silly but what if .so appears as a dirname
QVERIFY(!LinuxProc::isLibraryPath("/bin/a.so.0/abc/foo"));
// Drkonqi also has some python handling for some reason :shrug:
QVERIFY(LinuxProc::isLibraryPath("/bin/a.py"));
QVERIFY(LinuxProc::isLibraryPath("/bin/a.pyc"));
QVERIFY(LinuxProc::isLibraryPath("/bin/a.py (deleted)"));
}
};
QTEST_GUILESS_MAIN(LinuxProcMapsParserTest)
#include "linuxprocmapsparsertest.moc"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment