Commit 58740145 authored by Michael Reeves's avatar Michael Reeves

Add c++/c comment parser

parent faa47628
......@@ -24,6 +24,7 @@ include(FeatureSummary)
include(ECMInstallIcons)
include(ECMAddAppIcon)
include(ECMSetupVersion)
include(ECMAddTests)
ecm_setup_version(1.9.70 VARIABLE_PREFIX KDIFF3 VERSION_HEADER ${CMAKE_BINARY_DIR}/src/version.h)
......
......@@ -31,7 +31,8 @@ set(kdiff3part_PART_SRCS
Logging.cpp
FileNameLineEdit.cpp
MergeEditLine.cpp
Options.cpp )
Options.cpp
CommentParser.cpp )
add_library(kdiff3part MODULE ${kdiff3part_PART_SRCS})
......@@ -51,6 +52,9 @@ set(kdiff3_SRCS
${kdiff3part_PART_SRCS}
)
if(BUILD_TESTING)
add_subdirectory( autotests )
endif()
#cann't use add_subdirectory because it changes the scope.
include(icons/CMakeLists.txt)
add_executable(kdiff3 ${kdiff3_SRCS})
......
/**
* Copyright (C) 2019 Michael Reeves <reeves.87@gmail.com>
*
* This file is part of KDiff3.
*
* KDiff3 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* KDiff3 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with KDiff3. If not, see <http://www.gnu.org/licenses/>.
*/
#include "CommentParser.h"
#include <QChar>
#include <QRegularExpression>
#include <QSharedPointer>
#include <QString>
void DefaultCommentParser::processChar(const QString &line, const QChar &inChar)
{
if(!bIsEscaped)
{
switch(inChar.unicode())
{
case '\\':
if(bInString)
bIsEscaped = true;
break;
case '\'':
case '"':
if(!inComment())
{
if(!bInString)
{
bInString = true;
mStartChar = inChar;
}
else if(mStartChar == inChar)
{
bInString = false;
}
}
break;
case '/':
if(bInString)
break;
if(!inComment() && mLastChar == '/')
{
mCommentType = singleLine;
mIsPureComment = line.startsWith("//") ? yes : no;
}
else if(mLastChar == '*' && mCommentType == multiLine)
{
//ending multi-line comment
mCommentType = none;
if(!isFirstLine)
mIsPureComment = line.endsWith("*/") ? yes : mIsPureComment;
}
break;
case '*':
if(bInString)
break;
if(mLastChar == '/' && !inComment())
{
mCommentType = multiLine;
mIsPureComment = line.startsWith("/*") ? yes : mIsPureComment;
isFirstLine = true;
}
break;
case '\n':
if(mCommentType == singleLine)
{
mCommentType = none;
}
if(mCommentType == multiLine && !isFirstLine)
{
mIsPureComment = yes;
}
isFirstLine = false;
break;
default:
if(inComment())
{
break;
}
mIsPureComment = no;
break;
}
mLastChar = inChar;
}
else
{
bIsEscaped = false;
mLastChar = QChar();
}
};
void DefaultCommentParser::processLine(const QString &line)
{
for(const QChar &c : line)
{
processChar(line, c);
}
processChar(line, '\n');
}
void DefaultCommentParser::removeComments()
{
}
/**
* Copyright (C) 2019 Michael Reeves <reeves.87@gmail.com>
*
* This file is part of KDiff3.
*
* KDiff3 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* KDiff3 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with KDiff3. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COMMENTPARSER_H
#define COMMENTPARSER_H
#include <QChar>
#include <QString>
class CommentParser
{
public:
inline virtual void processChar(const QString &line, const QChar &inChar) = 0;
inline virtual void processLine(const QString &line) = 0;
inline virtual bool inComment() const = 0;
inline virtual bool isPureComment() const = 0;
inline virtual void removeComments() = 0;
virtual ~CommentParser(){};
};
class DefaultCommentParser : public CommentParser
{
private:
typedef enum {none, singleLine, multiLine}CommentType;
typedef enum {no, yes, unknown}TriState;
public:
void processLine(const QString &line) override;
inline bool inComment() const override { return mCommentType != none; };
inline bool isPureComment() const override { return mIsPureComment == yes; };
inline void removeComments() override;
protected:
friend class CommentParserTest;
void processChar(const QString &line, const QChar &inChar) override;
//For tests only.
inline bool isEscaped(){ return bIsEscaped; }
inline bool inString(){ return bInString; }
private:
QChar mLastChar, mStartChar;
bool isFirstLine = false;
TriState mIsPureComment = unknown;
bool bInString = false;
bool bIsEscaped = false;
CommentType mCommentType = none;
quint32 pos = 0;
};
#endif // !COMMENTPASER_H
......@@ -32,10 +32,12 @@ Optimizations: Skip unneeded steps.
*/
#include "SourceData.h"
#include "CommentParser.h"
#include "Utils.h"
#include "diff.h"
#include "Logging.h"
#include <QScopedPointer>
#include <QProcess>
#include <QString>
#include <QTemporaryFile>
......@@ -571,6 +573,7 @@ bool SourceData::FileData::preprocess(QTextCodec* pEncoding)
LineCount lineCount = 0;
qint64 lastOffset = 0;
qint64 skipBytes = 0;
QScopedPointer<CommentParser> parser(new DefaultCommentParser());
// detect line end style
QVector<e_LineEndStyle> vOrigDataLineEndStyle;
......@@ -652,9 +655,11 @@ bool SourceData::FileData::preprocess(QTextCodec* pEncoding)
vOrigDataLineEndStyle.push_back(eLineEndStyleUndefined);
break;
}
parser->processLine(line);
//kdiff3 internally uses only unix style endings for simplicity.
m_v.push_back(LineData(m_unicodeBuf, lastOffset, line.length()));
m_v.push_back(LineData(m_unicodeBuf, lastOffset, line.length(), parser->isPureComment()));
m_unicodeBuf->append(line).append('\n');
lastOffset = m_unicodeBuf->length();
}
......
find_package(
Qt5 ${QT_MIN_VERSION}
REQUIRED
Test
)
ecm_add_test(commentparser.cpp ../CommentParser.cpp
TEST_NAME "commentparser"
LINK_LIBRARIES Qt5::Test
)
/**
* Copyright (C) 2019 Michael Reeves <reeves.87@gmail.com>
*
* This file is part of KDiff3.
*
* KDiff3 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* KDiff3 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with KDiff3. If not, see <http://www.gnu.org/licenses/>.
*/
#include <QtTest/QtTest>
#include "../CommentParser.h"
class CommentParserTest : public QObject
{
Q_OBJECT
private slots:
void init()
{
DefaultCommentParser parser;
//Sanity check defaults.
QVERIFY(!parser.isPureComment());
QVERIFY(!parser.isEscaped());
QVERIFY(!parser.inString());
QVERIFY(!parser.inComment());
}
void singleLineComment1()
{
DefaultCommentParser test1;
test1.processLine("//ddj ?*8");
QVERIFY(!test1.inComment());
QVERIFY(test1.isPureComment());
}
void singleLineComment2()
{
DefaultCommentParser test1;
test1.processLine("//comment with quotes embeded \"\"");
QVERIFY(!test1.inComment());
QVERIFY(test1.isPureComment());
}
void singleLineComment3()
{
DefaultCommentParser test1;
test1.processLine("anythis//endof line comment");
QVERIFY(!test1.inComment());
QVERIFY(!test1.isPureComment());
}
void singleLineComment4()
{
DefaultCommentParser test1;
test1.processLine("anythis//ignore embeded multiline squence /*");
QVERIFY(!test1.inComment());
QVERIFY(!test1.isPureComment());
}
void inComment()
{
DefaultCommentParser test;
test.mCommentType = DefaultCommentParser::multiLine;
QVERIFY(test.inComment());
test.mCommentType = DefaultCommentParser::singleLine;
QVERIFY(test.inComment());
test.mCommentType = DefaultCommentParser::none;
QVERIFY(!test.inComment());
}
void multiLineComment()
{
DefaultCommentParser test;
//mutiline syntax on one line
test.processLine("/* kjd*/");
QVERIFY(test.isPureComment());
QVERIFY(!test.inComment());
//mid line comment start.
test = DefaultCommentParser();
test.processLine("fskk /* kjd */");
QVERIFY(!test.inComment());
QVERIFY(!test.isPureComment());
//mid line comment start. mutiple lines
test = DefaultCommentParser();
test.processLine("fskk /* kjd ");
QVERIFY(test.inComment());
QVERIFY(!test.isPureComment());
test.processLine(" comment line ");
QVERIFY(test.inComment());
QVERIFY(test.isPureComment());
test.processLine(" comment */ not comment ");
QVERIFY(!test.inComment());
QVERIFY(!test.isPureComment());
//mid line comment start. mutiple lines
test = DefaultCommentParser();
test.processLine("fskk /* kjd ");
QVERIFY(test.inComment());
QVERIFY(!test.isPureComment());
test.processLine(" comment line ");
QVERIFY(test.inComment());
QVERIFY(test.isPureComment());
//embeded single line character squence should not end comment
test.processLine(" comment line //");
QVERIFY(test.inComment());
QVERIFY(test.isPureComment());
test.processLine(" comment */");
QVERIFY(!test.inComment());
QVERIFY(test.isPureComment());
//Escape squeances not relavate to comments
test.processLine("/* comment \\*/");
QVERIFY(!test.inComment());
QVERIFY(test.isPureComment());
//invalid in C++ should not be flagged as pure comment
test.processLine("/* comment */ */");
QVERIFY(!test.inComment());
QVERIFY(!test.isPureComment());
}
void stringTest()
{
DefaultCommentParser test;
test.processLine("\"quoted string // \"");
QVERIFY(!test.inString());
QVERIFY(!test.inComment());
QVERIFY(!test.isPureComment());
test = DefaultCommentParser();
test.processLine("\"quoted string /* \"");
QVERIFY(!test.inString());
QVERIFY(!test.inComment());
QVERIFY(!test.isPureComment());
//test only escape squeance we care about
test = DefaultCommentParser();
test.processChar("\"", '"');
QVERIFY(!test.isEscaped());
QVERIFY(test.inString());
test.processChar("\"", '\\');
QVERIFY(test.isEscaped());
QVERIFY(test.inString());
test.processChar("\"\\\"", '"');
QVERIFY(!test.isEscaped());
QVERIFY(test.inString());
test.processChar("\"\\\"\"", '"');
QVERIFY(!test.isEscaped());
QVERIFY(!test.inString());
}
};
QTEST_MAIN(CommentParserTest);
#include "commentparser.moc"
......@@ -97,11 +97,12 @@ class LineData
public:
explicit LineData() = default; // needed for Qt internal reasons should not be used.
inline LineData(const QSharedPointer<QString> &buffer, const qint64 inOffset, QtNumberType inSize = 0)
inline LineData(const QSharedPointer<QString> &buffer, const qint64 inOffset, QtNumberType inSize = 0, bool inIsPureComment=false)
{
mBuffer = buffer;
mOffset = inOffset;
mSize = inSize;
bContainsPureComment = inIsPureComment;
}
Q_REQUIRED_RESULT inline int size() const { return mSize; }
inline void setFirstNonWhiteChar(const qint32 firstNonWhiteChar) { mFirstNonWhiteChar = firstNonWhiteChar;}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment