cliplugin.cpp 20 KB
Newer Older
1
    /*
2 3 4
 * ark -- archiver for the KDE project
 *
 * Copyright (C) 2009 Harald Hvaal <haraldhv@stud.ntnu.no>
5
 * Copyright (C) 2010-2011,2014 Raphael Kubo da Costa <rakuco@FreeBSD.org>
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 *
 */
22 23

#include "cliplugin.h"
24
#include "ark_debug.h"
25
#include "kerfuffle/kerfuffle_export.h"
26

27
#include <QDateTime>
28 29
#include <QRegularExpression>

Bhushan Shah's avatar
Bhushan Shah committed
30
#include <KPluginFactory>
Laurent Montel's avatar
Laurent Montel committed
31

32
using namespace Kerfuffle;
33

34
K_PLUGIN_FACTORY_WITH_JSON(CliPluginFactory, "kerfuffle_clirar.json", registerPlugin<CliPlugin>();)
35

36
CliPlugin::CliPlugin(QObject *parent, const QVariantList& args)
37
        : CliInterface(parent, args)
38 39
        , m_parseState(ParseStateTitle)
        , m_isUnrar5(false)
40
        , m_isPasswordProtected(false)
41
        , m_isSolid(false)
42 43
        , m_remainingIgnoreLines(1) //The first line of UNRAR output is empty.
        , m_linesComment(0)
44
{
45
    qCDebug(ARK) << "Loaded cli_rar plugin";
46 47 48

    // Empty lines are needed for parsing output of unrar.
    setListEmptyLines(true);
49
}
50

51 52 53
CliPlugin::~CliPlugin()
{
}
54

55 56 57 58 59 60 61
void CliPlugin::resetParsing()
{
    m_parseState = ParseStateTitle;
    m_remainingIgnoreLines = 1;
    m_comment.clear();
}

62 63 64 65 66 67 68 69 70 71 72 73
// #272281: the proprietary unrar program does not like trailing '/'s
//          in directories passed to it when extracting only part of
//          the files in an archive.
QString CliPlugin::escapeFileName(const QString &fileName) const
{
    if (fileName.endsWith(QLatin1Char('/'))) {
        return fileName.left(fileName.length() - 1);
    }

    return fileName;
}

74 75
ParameterList CliPlugin::parameterList() const
{
76
    static ParameterList p;
77

78 79
    if (p.isEmpty()) {
        p[CaptureProgress] = true;
Ragnar Thomsen's avatar
Ragnar Thomsen committed
80
        p[ListProgram] = p[ExtractProgram] = p[TestProgram] = QStringList() << QStringLiteral( "unrar" );
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
        p[DeleteProgram] = p[AddProgram] = QStringList() << QStringLiteral( "rar" );

        p[ListArgs] = QStringList() << QStringLiteral("vt")
                                    << QStringLiteral("-v")
                                    << QStringLiteral("$PasswordSwitch")
                                    << QStringLiteral("$Archive");
        p[ExtractArgs] = QStringList() << QStringLiteral( "-kb" )
                                       << QStringLiteral( "-p-" )
                                       << QStringLiteral( "$PreservePathSwitch" )
                                       << QStringLiteral( "$PasswordSwitch" )
                                       << QStringLiteral( "$Archive" )
                                       << QStringLiteral( "$Files" );
        p[PreservePathSwitch] = QStringList() << QStringLiteral( "x" )
                                              << QStringLiteral( "e" );
        p[PasswordSwitch] = QStringList() << QStringLiteral( "-p$Password" );
        p[PasswordHeaderSwitch] = QStringList() << QStringLiteral("-hp$Password");
97
        p[CompressionLevelSwitch] = QStringLiteral("-m$CompressionLevel");
98
        p[DeleteArgs] = QStringList() << QStringLiteral( "d" )
99
                                      << QStringLiteral( "$PasswordSwitch" )
100 101
                                      << QStringLiteral( "$Archive" )
                                      << QStringLiteral( "$Files" );
102
        p[FileExistsExpression] = QStringList()
103 104 105 106 107 108 109 110 111 112 113
                                << QStringLiteral("^\\[Y\\]es, \\[N\\]o, \\[A\\]ll, n\\[E\\]ver, \\[R\\]ename, \\[Q\\]uit $");
        p[FileExistsFileName] = QStringList() << QStringLiteral("^(.+) already exists. Overwrite it")  // unrar 3 & 4
                                              << QStringLiteral("^Would you like to replace the existing file (.+)$"); // unrar 5
        p[FileExistsInput] = QStringList() << QStringLiteral( "Y" )  //overwrite
                                           << QStringLiteral( "N" )  //skip
                                           << QStringLiteral( "A" )  //overwrite all
                                           << QStringLiteral( "E" )  //autoskip
                                           << QStringLiteral( "Q" ); //cancel
        p[AddArgs] = QStringList() << QStringLiteral( "a" )
                                   << QStringLiteral( "$Archive" )
                                   << QStringLiteral("$PasswordSwitch")
114
                                   << QStringLiteral("$CompressionLevelSwitch")
115
                                   << QStringLiteral( "$Files" );
116
        p[PasswordPromptPattern] = QLatin1String("Enter password \\(will not be echoed\\) for");
117 118 119
        p[WrongPasswordPatterns] = QStringList() << QStringLiteral("password incorrect") << QStringLiteral("wrong password");
        p[ExtractionFailedPatterns] = QStringList() << QStringLiteral( "CRC failed" )
                                                    << QStringLiteral( "Cannot find volume" );
120 121
        p[CorruptArchivePatterns] = QStringList() << QStringLiteral("Unexpected end of archive")
                                                  << QStringLiteral("the file header is corrupt");
122
        p[DiskFullPatterns] = QStringList() << QStringLiteral("No space left on device");
123 124 125 126
        p[CommentArgs] = QStringList() << QStringLiteral("c")
                                       << QStringLiteral("$CommentSwitch")
                                       << QStringLiteral("$Archive");
        p[CommentSwitch] = QStringLiteral("-z$CommentFile");
Ragnar Thomsen's avatar
Ragnar Thomsen committed
127 128 129
        p[TestArgs] = QStringList() << QStringLiteral("t")
                                    << QStringLiteral("$Archive");
        p[TestPassedPattern] = QStringLiteral("^All OK$");
130
    }
131

132
    return p;
133
}
134

135
bool CliPlugin::readListLine(const QString &line)
136
{
137 138 139 140 141
    // Ignore number of lines corresponding to m_remainingIgnoreLines.
    if (m_remainingIgnoreLines > 0) {
        --m_remainingIgnoreLines;
        return true;
    }
Christoph Feck's avatar
Christoph Feck committed
142

143 144
    // Parse the title line, which contains the version of unrar.
    if (m_parseState == ParseStateTitle) {
Christoph Feck's avatar
Christoph Feck committed
145

146 147
        QRegularExpression rxVersionLine(QStringLiteral("^UNRAR (\\d+\\.\\d+)( beta \\d)? .*$"));
        QRegularExpressionMatch matchVersion = rxVersionLine.match(line);
Christoph Feck's avatar
Christoph Feck committed
148

149 150 151
        if (matchVersion.hasMatch()) {
            m_parseState = ParseStateComment;
            QString unrarVersion = matchVersion.captured(1);
152
            qCDebug(ARK) << "UNRAR version" << unrarVersion << "detected";
153 154
            if (unrarVersion.toFloat() >= 5) {
                m_isUnrar5 = true;
155
                qCDebug(ARK) << "Using UNRAR 5 parser";
Christoph Feck's avatar
Christoph Feck committed
156
            } else {
157
                qCDebug(ARK) << "Using UNRAR 4 parser";
Christoph Feck's avatar
Christoph Feck committed
158
            }
159 160 161
        }  else {
            // If the second line doesn't contain an UNRAR title, something
            // is wrong.
162
            qCCritical(ARK) << "Failed to detect UNRAR output.";
163 164 165 166 167 168 169 170 171 172
            return false;
        }

    // Or see what version of unrar we are dealing with and call specific
    // handler functions.
    } else if (m_isUnrar5) {
        handleUnrar5Line(line);
    } else {
        handleUnrar4Line(line);
    }
Christoph Feck's avatar
Christoph Feck committed
173

174 175
    return true;
}
Christoph Feck's avatar
Christoph Feck committed
176

177
void CliPlugin::handleUnrar5Line(const QString &line) {
Christoph Feck's avatar
Christoph Feck committed
178

179 180
    // Parses the comment field.
    if (m_parseState == ParseStateComment) {
Christoph Feck's avatar
Christoph Feck committed
181

182
        // RegExp matching end of comment field.
183 184
        // FIXME: Comment itself could also contain the Archive path string here.
        QRegularExpression rxCommentEnd(QStringLiteral("^Archive: .+$"));
Christoph Feck's avatar
Christoph Feck committed
185

186 187 188 189 190
        if (rxCommentEnd.match(line).hasMatch()) {
            m_parseState = ParseStateHeader;
            m_comment = m_comment.trimmed();
            m_linesComment = m_comment.count(QLatin1Char('\n')) + 1;
            if (!m_comment.isEmpty()) {
191
                qCDebug(ARK) << "Found a comment with" << m_linesComment << "lines";
192 193 194 195
            }

        } else {
            m_comment.append(line + QLatin1Char('\n'));
Christoph Feck's avatar
Christoph Feck committed
196 197
        }

198 199 200 201 202 203
        return;
    }

    // Parses the header, which is whatever is between the comment field
    // and the entries.
    else if (m_parseState == ParseStateHeader) {
Christoph Feck's avatar
Christoph Feck committed
204

205
        // "Details: " indicates end of header.
206
        if (line.startsWith(QStringLiteral("Details: "))) {
207
            ignoreLines(1, ParseStateEntryDetails);
208 209 210 211 212 213
            if (line.contains(QLatin1String("volume"))) {
                m_numberOfVolumes++;
                if (!m_isMultiVolume) {
                    m_isMultiVolume = true;
                    qCDebug(ARK) << "Multi-volume archive detected";
                }
214 215 216
            }
            if (line.contains(QLatin1String("solid")) && !m_isSolid) {
                m_isSolid = true;
217
                qCDebug(ARK) << "Solid archive detected";
218
            }
Christoph Feck's avatar
Christoph Feck committed
219
        }
220 221 222 223 224 225
        return;
    }

    // Parses the entry details for each entry.
    else if (m_parseState == ParseStateEntryDetails) {

226 227 228 229 230 231
        // For multi-volume archives there is a header between the entries in
        // each volume.
        if (line.startsWith(QLatin1String("Archive: "))) {
            m_parseState = ParseStateHeader;
            return;

232
        // Empty line indicates end of entry.
233
        } else if (line.trimmed().isEmpty() && !m_unrar5Details.isEmpty()) {
234 235 236
            handleUnrar5Entry();

        } else {
Christoph Feck's avatar
Christoph Feck committed
237

238 239
            // All detail lines should contain a colon.
            if (!line.contains(QLatin1Char(':'))) {
240
                qCWarning(ARK) << "Unrecognized line:" << line;
241 242 243 244 245 246 247
                return;
            }

            // The details are on separate lines, so we store them in the QHash
            // m_unrar5Details.
            m_unrar5Details.insert(line.section(QLatin1Char(':'), 0, 0).trimmed().toLower(),
                                   line.section(QLatin1Char(':'), 1).trimmed());
Christoph Feck's avatar
Christoph Feck committed
248 249
        }

250 251 252 253 254
        return;
    }
}

void CliPlugin::handleUnrar5Entry() {
Christoph Feck's avatar
Christoph Feck committed
255

256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
    ArchiveEntry e;

    QString compressionRatio = m_unrar5Details.value(QStringLiteral("ratio"));
    compressionRatio.chop(1); // Remove the '%'
    e[Ratio] = compressionRatio;

    QString time = m_unrar5Details.value(QStringLiteral("mtime"));
    QDateTime ts = QDateTime::fromString(time, QStringLiteral("yyyy-MM-dd HH:mm:ss,zzz"));
    e[Timestamp] = ts;

    bool isDirectory = (m_unrar5Details.value(QStringLiteral("type")) == QLatin1String("Directory"));
    e[IsDirectory] = isDirectory;

    if (isDirectory && !m_unrar5Details.value(QStringLiteral("name")).endsWith(QLatin1Char('/'))) {
        m_unrar5Details[QStringLiteral("name")] += QLatin1Char('/');
Christoph Feck's avatar
Christoph Feck committed
271 272
    }

273 274 275 276 277 278 279 280 281 282
    QString compression = m_unrar5Details.value(QStringLiteral("compression"));
    int optionPos = compression.indexOf(QLatin1Char('-'));
    if (optionPos != -1) {
        e[Method] = compression.mid(optionPos);
        e[Version] = compression.left(optionPos).trimmed();
    } else {
        // No method specified.
        e[Method].clear();
        e[Version] = compression;
    }
283

284 285
    m_isPasswordProtected = m_unrar5Details.value(QStringLiteral("flags")).contains(QStringLiteral("encrypted"));
    e[IsPasswordProtected] = m_isPasswordProtected;
286

287 288 289 290 291 292
    e[FileName] = m_unrar5Details.value(QStringLiteral("name"));
    e[InternalID] = m_unrar5Details.value(QStringLiteral("name"));
    e[Size] = m_unrar5Details.value(QStringLiteral("size"));
    e[CompressedSize] = m_unrar5Details.value(QStringLiteral("packed size"));
    e[Permissions] = m_unrar5Details.value(QStringLiteral("attributes"));
    e[CRC] = m_unrar5Details.value(QStringLiteral("crc32"));
293

294 295 296 297 298 299 300
    if (e[Permissions].toString().startsWith(QLatin1Char('l'))) {
        e[Link] = m_unrar5Details.value(QStringLiteral("target"));
    }

    m_unrar5Details.clear();
    emit entry(e);
}
301

302
void CliPlugin::handleUnrar4Line(const QString &line) {
303

304 305 306 307
    // Parses the comment field.
    if (m_parseState == ParseStateComment) {

        // RegExp matching end of comment field.
308 309
        // FIXME: Comment itself could also contain the Archive path string here.
        QRegularExpression rxCommentEnd(QStringLiteral("^(Solid archive|Archive|Volume) .+$"));
310 311

        if (rxCommentEnd.match(line).hasMatch()) {
312

313
            if (line.startsWith(QLatin1String("Volume"))) {
314 315 316 317 318
                m_numberOfVolumes++;
                if (!m_isMultiVolume) {
                    m_isMultiVolume = true;
                    qCDebug(ARK) << "Multi-volume archive detected";
                }
319 320 321
            }
            if (line.startsWith(QLatin1String("Solid archive")) && !m_isSolid) {
                m_isSolid = true;
322
                qCDebug(ARK) << "Solid archive detected";
323 324
            }

325
            m_parseState = ParseStateHeader;
326 327 328
            m_comment = m_comment.trimmed();
            m_linesComment = m_comment.count(QLatin1Char('\n')) + 1;
            if (!m_comment.isEmpty()) {
329
                qCDebug(ARK) << "Found a comment with" << m_linesComment << "lines";
330
            }
331

332 333
        } else {
            m_comment.append(line + QLatin1Char('\n'));
334
        }
335

336 337
        return;
    }
338

339 340 341
    // Parses the header, which is whatever is between the comment field
    // and the entries.
    else if (m_parseState == ParseStateHeader) {
342

343 344 345 346 347 348
        // Horizontal line indicates end of header.
        if (line.startsWith(QStringLiteral("--------------------"))) {
            m_parseState = ParseStateEntryFileName;
        }
        return;
    }
349

350 351
    // Parses the entry name, which is on the first line of each entry.
    else if (m_parseState == ParseStateEntryFileName) {
352

353 354 355
        // Ignore empty lines.
        if (line.trimmed().isEmpty()) {
            return;
356
        }
357

358 359 360 361 362
        // Three types of subHeaders can be displayed for unrar 3 and 4.
        // STM has 4 lines, RR has 3, and CMT has lines corresponding to
        // length of comment field +3. We ignore the subheaders.
        QRegularExpression rxSubHeader(QStringLiteral("^Data header type: (CMT|STM|RR)$"));
        QRegularExpressionMatch matchSubHeader = rxSubHeader.match(line);
363
        if (matchSubHeader.hasMatch()) {
364
            qCDebug(ARK) << "SubHeader of type" << matchSubHeader.captured(1) << "found";
365 366 367 368 369 370 371 372 373
            if (matchSubHeader.captured(1) == QLatin1String("STM")) {
                ignoreLines(4, ParseStateEntryFileName);
            } else if (matchSubHeader.captured(1) == QLatin1String("CMT")) {
                ignoreLines(m_linesComment + 3, ParseStateEntryFileName);
            } else if (matchSubHeader.captured(1) == QLatin1String("RR")) {
                ignoreLines(3, ParseStateEntryFileName);
            }
            return;
        }
374

375 376 377
        // The entries list ends with a horizontal line, followed by a
        // single summary line or, for multi-volume archives, another header.
        if (line.startsWith(QStringLiteral("-----------------"))) {
378
            m_parseState = ParseStateHeader;
379
            return;
380

381 382 383 384
        // Encrypted files are marked with an asterisk.
        } else if (line.startsWith(QLatin1Char('*'))) {
            m_isPasswordProtected = true;
            m_unrar4Details.append(QString(line.trimmed()).remove(0, 1)); //Remove the asterisk
385

386 387 388
        // Entry names always start at the second position, so a line not
        // starting with a space is not an entry name.
        } else if (!line.startsWith(QLatin1Char(' '))) {
389
            qCWarning(ARK) << "Unrecognized line:" << line;
390
            return;
391

392 393 394 395
        // If we reach this, then we can assume the line is an entry name, so
        // save it, and move on to the rest of the entry details.
        } else {
            m_unrar4Details.append(line.trimmed());
Raphael Kubo da Costa's avatar
Raphael Kubo da Costa committed
396
        }
397

398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
        m_parseState = ParseStateEntryDetails;

        return;
    }

    // Parses the remainder of the entry details for each entry.
    else if (m_parseState == ParseStateEntryDetails) {

        // If the line following an entry name is empty, we did something
        // wrong.
        Q_ASSERT(!line.trimmed().isEmpty());

        // If we reach a horizontal line, then the previous line was not an
        // entry name, so go back to header.
        if (line.startsWith(QStringLiteral("-----------------"))) {
            m_parseState = ParseStateHeader;
            return;
415 416
        }

417 418 419 420 421 422 423 424 425 426 427 428
        // In unrar 3 and 4 the details are on a single line, so we
        // pass a QStringList containing the details. We need to store
        // it due to symlinks (see below).
        m_unrar4Details.append(line.split(QLatin1Char(' '),
                                          QString::SkipEmptyParts));

        // The details line contains 9 fields, so m_unrar4Details
        // should now contain 9 + the filename = 10 strings. If not, this is
        // not an archive entry.
        if (m_unrar4Details.size() != 10) {
            m_parseState = ParseStateHeader;
            return;
429 430
        }

431 432 433 434 435 436
        // When unrar 3 and 4 list a symlink, they output an extra line
        // containing the link target. The extra line is output after
        // the line we ignore, so we first need to ignore one line.
        if (m_unrar4Details.at(6).startsWith(QLatin1Char('l'))) {
            ignoreLines(1, ParseStateLinkTarget);
            return;
437
        } else {
438
            handleUnrar4Entry();
439 440
        }

441 442 443 444 445 446 447
        // Unrar 3 & 4 show a third line for each entry, which contains
        // three details: Host OS, Solid, and Old. We can ignore this
        // line.
        ignoreLines(1, ParseStateEntryFileName);

        return;
    }
448

449 450
    // Parses a symlink target.
    else if (m_parseState == ParseStateLinkTarget) {
451

452 453 454 455 456
        m_unrar4Details.append(QString(line).remove(QStringLiteral("-->")).trimmed());
        handleUnrar4Entry();

        m_parseState = ParseStateEntryFileName;
        return;
457
    }
458
}
459

460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
void CliPlugin::handleUnrar4Entry() {

    ArchiveEntry e;

    QDateTime ts = QDateTime::fromString(QString(m_unrar4Details.at(4) + QLatin1Char(' ') + m_unrar4Details.at(5)),
                                         QStringLiteral("dd-MM-yy hh:mm"));
    // Unrar 3 & 4 output dates with a 2-digit year but QDateTime takes it as
    // 19??. Let's take 1950 as cut-off; similar to KDateTime.
    if (ts.date().year() < 1950) {
        ts = ts.addYears(100);
    }
    e[Timestamp] = ts;

    bool isDirectory = ((m_unrar4Details.at(6).at(0) == QLatin1Char('d')) ||
                        (m_unrar4Details.at(6).at(1) == QLatin1Char('D')));
    e[IsDirectory] = isDirectory;

    if (isDirectory && !m_unrar4Details.at(0).endsWith(QLatin1Char('/'))) {
        m_unrar4Details[0] += QLatin1Char('/');
    }

    // Unrar reports the ratio as ((compressed size * 100) / size);
    // we consider ratio as (100 * ((size - compressed size) / size)).
    // If the archive is a multivolume archive, a string indicating
    // whether the archive's position in the volume is displayed
    // instead of the compression ratio.
    QString compressionRatio = m_unrar4Details.at(3);
    if ((compressionRatio == QStringLiteral("<--")) ||
        (compressionRatio == QStringLiteral("<->")) ||
        (compressionRatio == QStringLiteral("-->"))) {
        compressionRatio = QLatin1Char('0');
    } else {
        compressionRatio.chop(1); // Remove the '%'
    }
    e[Ratio] = compressionRatio;

    // TODO:
    // - Permissions differ depending on the system the entry was added
    //   to the archive.
    e[FileName] = m_unrar4Details.at(0);
    e[InternalID] = m_unrar4Details.at(0);
    e[Size] = m_unrar4Details.at(1);
    e[CompressedSize] = m_unrar4Details.at(2);
    e[Permissions] = m_unrar4Details.at(6);
    e[CRC] = m_unrar4Details.at(7);
    e[Method] = m_unrar4Details.at(8);
    e[Version] = m_unrar4Details.at(9);
    e[IsPasswordProtected] = m_isPasswordProtected;

    if (e[Permissions].toString().startsWith(QLatin1Char('l'))) {
        e[Link] = m_unrar4Details.at(10);
    }

    m_unrar4Details.clear();
    emit entry(e);
}

void CliPlugin::ignoreLines(int lines, ParseState nextState) {
    m_remainingIgnoreLines = lines;
    m_parseState = nextState;
520
}
Laurent Montel's avatar
Laurent Montel committed
521

522
#include "cliplugin.moc"