Verified Commit f8cec3c7 authored by Michael Reeves's avatar Michael Reeves
Browse files

Fix EOL handling in SourceData::FileData::preprocess

This resolves the striping of the trailing EOLs.

CCBUG:437570
parent 38f7a20a
......@@ -32,10 +32,12 @@ Optimizations: Skip unneeded steps.
#include "CommentParser.h"
#include "diff.h"
#include "LineRef.h"
#include "Logging.h"
#include "Utils.h"
#include <algorithm> // for min
#include <qglobal.h>
#include <vector> // for vector
#include <QProcess>
......@@ -175,9 +177,9 @@ const std::shared_ptr<LineDataVector>& SourceData::getLineDataForDisplay() const
return m_normalData.m_v;
}
LineRef SourceData::getSizeLines() const
LineCount SourceData::getSizeLines() const
{
return m_normalData.lineCount();
return SafeInt32<LineCount>(m_normalData.lineCount());
}
qint64 SourceData::getSizeBytes() const
......@@ -591,14 +593,20 @@ bool SourceData::FileData::preprocess(QTextCodec* pEncoding, bool removeComments
return false;
const QByteArray ba = QByteArray::fromRawData(m_pBuf.get() + skipBytes, (QtSizeType)(mDataSize - skipBytes));
QTextStream ts(ba, QIODevice::ReadOnly); //Don't use text mode we need to see the actual line ending.
QTextStream ts(ba, QIODevice::ReadOnly); // Don't use text mode we need to see the actual line ending.
QTextStream ts2(ba, QIODevice::ReadOnly);
ts.setCodec(pEncoding);
ts.setAutoDetectUnicode(false);
ts2.setCodec(pEncoding);
ts2.setAutoDetectUnicode(false);
QString s = ts2.readAll();
m_bIncompleteConversion = false;
m_unicodeBuf->clear();
//*m_unicodeBuf = ts.readAll();
assert(m_unicodeBuf->length() == 0);
mHasEOLTermination = false;
while(!ts.atEnd())
{
line.clear();
......@@ -608,7 +616,7 @@ bool SourceData::FileData::preprocess(QTextCodec* pEncoding, bool removeComments
return false;
}
ts >> curChar;
curChar = ts.read(1).unicode()[0];
QtSizeType firstNonwhite = 0;
bool foundNonWhite = false;
......@@ -635,7 +643,7 @@ bool SourceData::FileData::preprocess(QTextCodec* pEncoding, bool removeComments
if(ts.atEnd())
break;
ts >> curChar;
curChar = ts.read(1).unicode()[0];
}
++lineCount;
......@@ -658,7 +666,7 @@ bool SourceData::FileData::preprocess(QTextCodec* pEncoding, bool removeComments
if(m_pBuf[lastOffset + j] == '\n')
{
ts >> curChar;
curChar = ts.read(1).unicode()[0];
vOrigDataLineEndStyle.push_back(eLineEndStyleDos);
lastOffset = ts.pos();
break;
......@@ -675,16 +683,38 @@ bool SourceData::FileData::preprocess(QTextCodec* pEncoding, bool removeComments
//kdiff3 internally uses only unix style endings for simplicity.
m_v->push_back(LineData(m_unicodeBuf, lastOffset, line.length(), firstNonwhite, parser->isSkipable(), parser->isPureComment()));
m_unicodeBuf->append(line).append('\n');
//The last line may not have an EOL mark. In that case don't add one to our buffer.
m_unicodeBuf->append(line);
if(curChar == '\n')
{
m_unicodeBuf->append('\n');
}
lastOffset = m_unicodeBuf->length();
}
mHasEOLTermination = m_v->size() >= 2 && ((*m_v)[m_v->size() - 1].getLine() == ("\n")
&& (*m_v)[m_v->size() - 2].getLine() == ("\n"));
/*
Process trailing new line as if there were a blank non-terminated line after it.
But do nothing to the data buffer since this a phantom line needed for internal purposes.
*/
if(curChar == '\n')
{
mHasEOLTermination = true;
++lineCount;
parser->processLine("");
m_v->push_back(LineData(m_unicodeBuf, lastOffset, 0, 0, parser->isSkipable(), parser->isPureComment()));
}
m_v->push_back(LineData(m_unicodeBuf, lastOffset));
//Insure GNU:Diff can access one byte passed the end of the file. This is quick fix workaround.
//TODO: Fix this properly we should not be sending GNU:Diff invalid offsets.
//m_unicodeBuf->append('\u0000');
assert(m_v->size() < 2 || (*m_v)[m_v->size() - 1].getOffset() != (*m_v)[m_v->size() - 2].getOffset());
//Check if we have two identical offsets at the end. Indicates a bug in the read loop.
assert(m_v->size() < 2 || (*m_v)[m_v->size() - 2].getOffset() != (*m_v)[m_v->size() - 3].getOffset());
  • When m_v->size() == 2, the expression (*m_v)[m_v->size() - 3] will evaluate (*m_v)[-1] which is out-of-bounds. Depending on the correct semantics, either use

    assert(m_v->size() <= 2 || (*m_v)[m_v->size() - 2].getOffset() != (*m_v)[m_v->size() - 3].getOffset());

    or

    assert(m_v->size() < 2 || (*m_v)[m_v->size() - 1].getOffset() != (*m_v)[m_v->size() - 2].getOffset());

    like before this commit. Ping @mreeves .

Please register or sign in to reply
//Validate
//assert(m_v->size() < 1 || (quint64)(*m_v)[m_v->size() - 2].getOffset() == lastOffset);
m_bIsText = true;
......@@ -775,7 +805,7 @@ QTextCodec* SourceData::detectEncoding(const char* buf, qint64 size, FileOffset&
QByteArray s;
/*
We don't need the whole file here just the header.
] */
*/
if(size <= 5000)
s = QByteArray(buf, (QtSizeType)size);
else
......
......@@ -27,7 +27,7 @@ class SourceData
public:
void setOptions(const QSharedPointer<Options> &pOptions);
Q_REQUIRED_RESULT LineRef getSizeLines() const;
Q_REQUIRED_RESULT LineCount getSizeLines() const;
Q_REQUIRED_RESULT qint64 getSizeBytes() const;
Q_REQUIRED_RESULT const char* getBuf() const;
Q_REQUIRED_RESULT const QString& getText() const;
......@@ -66,7 +66,7 @@ class SourceData
void setEncoding(QTextCodec* pEncoding);
private:
protected:
bool convertFileEncoding(const QString& fileNameIn, QTextCodec* pCodecIn,
const QString& fileNameOut, QTextCodec* pCodecOut);
......
......@@ -12,6 +12,7 @@
#include <QTextCodec>
#include <QString>
#include <QTest>
#include <qtestcase.h>
class SourceDataMoc: public SourceData
{
......@@ -47,6 +48,7 @@ class DiffTest: public QObject
QVERIFY(!simData.isFromBuffer());
QVERIFY(!simData.isIncompleteConversion());
QVERIFY(simData.getErrors().isEmpty());
QVERIFY(!simData.hasEOLTermiantion());
//write out test file
testFile.open();
testFile.write(u8"//\n //\n \t\n D// \t\n");
......@@ -87,10 +89,52 @@ class DiffTest: public QObject
QVERIFY(!simData.isEmpty());
QVERIFY(simData.hasData());
QVERIFY(simData.getEncoding() != nullptr);
QCOMPARE(simData.getSizeLines(), 4);
QCOMPARE(simData.getSizeLines(), 5);
QCOMPARE(simData.getSizeBytes(), file.size());
}
void eolTest()
{
QTemporaryFile eolTest;
SourceDataMoc simData;
eolTest.open();
eolTest.write("\n}\n");
eolTest.close();
simData.setFilename(eolTest.fileName());
simData.readAndPreprocess(QTextCodec::codecForName("UTF-8"), true);
QVERIFY(simData.getErrors().isEmpty());
QVERIFY(!simData.isFromBuffer());
QVERIFY(!simData.isEmpty());
QVERIFY(simData.hasData());
QVERIFY(simData.getEncoding() != nullptr);
QVERIFY(simData.hasEOLTermiantion());
QCOMPARE(simData.getSizeLines(), 3);
QCOMPARE(simData.getSizeBytes(), FileAccess(eolTest.fileName()).size());
const std::shared_ptr<LineDataVector> &lineData = simData.getLineDataForDisplay();
QVERIFY(lineData != nullptr);
QCOMPARE(lineData->size() - 1, 3);//Phantom line generated for last EOL mark if present
QVERIFY(eolTest.remove());
eolTest.open();
eolTest.write("\n}");
eolTest.close();
simData.setFilename(eolTest.fileName());
simData.readAndPreprocess(QTextCodec::codecForName("UTF-8"), true);
QVERIFY(simData.getErrors().isEmpty());
QVERIFY(!simData.hasEOLTermiantion());
QCOMPARE(simData.getSizeLines(), 2);
QCOMPARE(simData.getSizeBytes(), FileAccess(eolTest.fileName()).size());
}
/*
This validates that LineData records are being setup with the correct data.
Only size and raw content are checked here.
......@@ -104,13 +148,13 @@ class DiffTest: public QObject
QVERIFY(simData.getErrors().isEmpty());
QVERIFY(simData.hasData());
QCOMPARE(simData.getSizeLines(), 4);
QCOMPARE(simData.getSizeLines(), 5);
QCOMPARE(simData.getSizeBytes(), file.size());
const std::shared_ptr<LineDataVector> &lineData = simData.getLineDataForDisplay();
//Verify LineData is being setup correctly.
QVERIFY(lineData != nullptr);
QCOMPARE(lineData->size() - 1, 4);
QCOMPARE(lineData->size() - 1, 5);
QVERIFY((*lineData)[0].getBuffer() != nullptr);
QCOMPARE((*lineData)[0].size(), 2);
......@@ -154,11 +198,17 @@ class DiffTest: public QObject
simData.readAndPreprocess(QTextCodec::codecForName("UTF-8"), true);
QVERIFY(simData.getErrors().isEmpty());
std::shared_ptr<const LineDataVector> lineData = simData.getLineDataForDisplay();
QVERIFY(simData.hasData());
//Verify basic line data structure.
QCOMPARE(lineData->size() - 1, 5);
simData2.readAndPreprocess(QTextCodec::codecForName("UTF-8"), true);
QVERIFY(simData2.getErrors().isEmpty());
QVERIFY(simData2.hasData());
lineData = simData.getLineDataForDisplay();
//Verify basic line data structure.
QCOMPARE(lineData->size() - 1, 5);
ManualDiffHelpList manualDiffList;
manualDiffList.runDiff(simData.getLineDataForDiff(), simData.getSizeLines(), simData2.getLineDataForDiff(), simData2.getSizeLines(), diffList, e_SrcSelector::A, e_SrcSelector::B,
......@@ -167,24 +217,25 @@ class DiffTest: public QObject
Verify DiffList generated by runDiff. Not tested elsewhere.
*/
DiffList::const_iterator diff = diffList.begin();
QVERIFY(diffList.size() == 2);
QVERIFY(diff->numberOfEquals() == 1);
QVERIFY(diff->diff1() == 1);
QVERIFY(diff->diff2() == 1);
QCOMPARE(diffList.size(), 2);
QCOMPARE(diff->numberOfEquals(), 1);
QCOMPARE(diff->diff1(), 1);
QCOMPARE(diff->diff2(), 1);
diff++;
QVERIFY(diff->numberOfEquals() == 2);
QVERIFY(diff->diff1() == 0);
QVERIFY(diff->diff2() == 0);
QCOMPARE(diff->numberOfEquals(), 3);
QCOMPARE(diff->diff1(), 0);
QCOMPARE(diff->diff2(), 0);
Diff3LineList diff3List;
diff3List.calcDiff3LineListUsingAB(&diffList);
QVERIFY(diff3List.size() == 4);
QCOMPARE(diff3List.size(), 5);
/*
{[0] = {lineA = {mLineNumber = 0}, lineB = {mLineNumber = 0}, lineC = {mLineNumber = -1}, bAEqC = false, bBEqC = false, bAEqB = true, bWhiteLineA = false, bWhiteLineB = false, bWhiteLineC = false, pFineAB = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, pFineBC = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, pFineCA = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, mLinesNeededForDisplay = 1, mSumLinesNeededForDisplay = 0},
[1] = {lineA = {mLineNumber = 1}, lineB = {mLineNumber = 1}, lineC = {mLineNumber = -1}, bAEqC = false, bBEqC = false, bAEqB = false, bWhiteLineA = false, bWhiteLineB = false, bWhiteLineC = false, pFineAB = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, pFineBC = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, pFineCA = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, mLinesNeededForDisplay = 1, mSumLinesNeededForDisplay = 0},
[2] = {lineA = {mLineNumber = 2}, lineB = {mLineNumber = 2}, lineC = {mLineNumber = -1}, bAEqC = false, bBEqC = false, bAEqB = true, bWhiteLineA = false, bWhiteLineB = false, bWhiteLineC = false, pFineAB = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, pFineBC = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, pFineCA = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, mLinesNeededForDisplay = 1, mSumLinesNeededForDisplay = 0},
[3] = {lineA = {mLineNumber = 3}, lineB = {mLineNumber = 3}, lineC = {mLineNumber = -1}, bAEqC = false, bBEqC = false, bAEqB = true, bWhiteLineA = false, bWhiteLineB = false, bWhiteLineC = false, pFineAB = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, pFineBC = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, pFineCA = std::shared_ptr<DiffList> (empty) = {get() = 0x0}, mLinesNeededForDisplay = 1, mSumLinesNeededForDisplay = 0}}
[4] = {lineA = (mLineNumber = 4), lineB = (mLineNumber = 4). lineC = (mLineNumber = -1), bAEqC = false, bBEqC = false, bAEqB = true, bWhiteLineA = false, bWhiteLineB = false, bWhiteLineC = false, pFineAB = nullptr, pFineBC = nullptr, pFineCA = nullptr, mLinesNeededForDisplay = 1. mSumLinesNeededForDisplay = 0}
*/
}
......@@ -203,7 +254,7 @@ class DiffTest: public QObject
const std::shared_ptr<LineDataVector> &lineData = simData.getLineDataForDisplay();
//Verify we actually have data.
QVERIFY(lineData != nullptr);
QCOMPARE(lineData->size() - 1, 4);
QCOMPARE(lineData->size() - 1, 5);
QVERIFY(!(*lineData)[0].whiteLine());
QVERIFY((*lineData)[0].isSkipable());
......@@ -220,6 +271,10 @@ class DiffTest: public QObject
QVERIFY(!(*lineData)[3].whiteLine());
QVERIFY(!(*lineData)[3].isPureComment());
QVERIFY(!(*lineData)[3].isSkipable());
QVERIFY((*lineData)[4].whiteLine());
QVERIFY(!(*lineData)[4].isPureComment());
QVERIFY(!(*lineData)[4].isSkipable());
}
};
......
......@@ -612,6 +612,8 @@ void DiffList::runDiff(const std::shared_ptr<LineDataVector> &p1, const size_t i
}
else
{
assert((size_t)size1 < p1->size() && (size_t)size2 < p2->size());
GnuDiff::comparison comparisonInput;
memset(&comparisonInput, 0, sizeof(comparisonInput));
comparisonInput.parent = nullptr;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment