gnudiff_diff.h 11.6 KB
Newer Older
1 2
/* Shared definitions for GNU DIFF

Michael Reeves's avatar
Michael Reeves committed
3
   Modified for KDiff3 by Joachim Eibl <joachim.eibl at gmx.de> 2003, 2004, 2005.
Joachim Eibl's avatar
Joachim Eibl committed
4
   The original file was part of GNU DIFF.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21

   Copyright (C) 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1998, 2001,
   2002 Free Software Foundation, Inc.

   GNU DIFF is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   GNU DIFF is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; see the file COPYING.
   If not, write to the Free Software Foundation,
22
   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
23

Joachim Eibl's avatar
Joachim Eibl committed
24 25 26
#ifndef GNUDIFF_DIFF_H
#define GNUDIFF_DIFF_H

27 28 29 30 31 32 33 34 35 36

#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>

#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <ctype.h>
#include <type_traits>
37 38

#include <stdio.h>
39 40

#include <QtGlobal>
Joachim Eibl's avatar
Joachim Eibl committed
41
#include <QString>
42

43 44 45 46 47 48 49 50 51
/* The integer type of a line number. */
typedef int LineRef;
#define LINEREF_MAX INT_MAX

static_assert(std::is_signed<LineRef>::value, "LineRef must be signed.");
//verify(lin_is_wide_enough, sizeof(int) <= sizeof(LineRef));



52 53 54 55 56
inline bool isEndOfLine( QChar c )
{
   return c=='\n' || c=='\r' || c=='\x0b';
}

57 58
#define TAB_WIDTH 8

Joachim Eibl's avatar
Joachim Eibl committed
59
class GnuDiff
60
{
Joachim Eibl's avatar
Joachim Eibl committed
61
public:
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
/* What kind of changes a hunk contains.  */
enum changes
{
  /* No changes: lines common to both files.  */
  UNCHANGED,

  /* Deletes only: lines taken from just the first file.  */
  OLD,

  /* Inserts only: lines taken from just the second file.  */
  NEW,

  /* Both deletes and inserts: a hunk containing both old and new lines.  */
  CHANGED
};

/* Variables for command line options */

/* Nonzero if output cannot be generated for identical files.  */
Joachim Eibl's avatar
Joachim Eibl committed
81
bool no_diff_means_no_output;
82 83 84

/* Number of lines of context to show in each set of diffs.
   This is zero when context is not to be shown.  */
85
LineRef context;
86 87 88

/* Consider all files as text files (-a).
   Don't interpret codes over 0177 as implying a "binary file".  */
Joachim Eibl's avatar
Joachim Eibl committed
89
bool text;
90 91

/* The significance of white space during comparisons.  */
Joachim Eibl's avatar
Joachim Eibl committed
92
enum
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
{
  /* All white space is significant (the default).  */
  IGNORE_NO_WHITE_SPACE,

  /* Ignore changes due to tab expansion (-E).  */
  IGNORE_TAB_EXPANSION,

  /* Ignore changes in horizontal white space (-b).  */
  IGNORE_SPACE_CHANGE,

  /* Ignore all horizontal white space (-w).  */
  IGNORE_ALL_SPACE
} ignore_white_space;

/* Ignore changes that affect only blank lines (-B).  */
Joachim Eibl's avatar
Joachim Eibl committed
108
bool ignore_blank_lines;
109 110

/* Ignore changes that affect only numbers. (J. Eibl)  */
Joachim Eibl's avatar
Joachim Eibl committed
111 112
bool bIgnoreNumbers;
bool bIgnoreWhiteSpace;
113 114 115

/* Files can be compared byte-by-byte, as if they were binary.
   This depends on various options.  */
Joachim Eibl's avatar
Joachim Eibl committed
116
bool files_can_be_treated_as_binary;
117 118

/* Ignore differences in case of letters (-i).  */
Joachim Eibl's avatar
Joachim Eibl committed
119
bool ignore_case;
120 121

/* Ignore differences in case of letters in file names.  */
Joachim Eibl's avatar
Joachim Eibl committed
122
bool ignore_file_name_case;
123 124

/* Regexp to identify function-header lines (-F).  */
Joachim Eibl's avatar
Joachim Eibl committed
125
//struct re_pattern_buffer function_regexp;
126 127

/* Ignore changes that affect only lines matching this regexp (-I).  */
Joachim Eibl's avatar
Joachim Eibl committed
128
//struct re_pattern_buffer ignore_regexp;
129 130

/* Say only whether files differ, not how (-q).  */
Joachim Eibl's avatar
Joachim Eibl committed
131
bool brief;
132 133 134

/* Expand tabs in the output so the text lines up properly
   despite the characters added to the front of each line (-t).  */
Joachim Eibl's avatar
Joachim Eibl committed
135
bool expand_tabs;
136 137 138 139

/* Use a tab in the output, rather than a space, before the text of an
   input line, so as to keep the proper alignment in the input line
   without changing the characters in it (-T).  */
Joachim Eibl's avatar
Joachim Eibl committed
140
bool initial_tab;
141 142 143 144

/* In directory comparison, specify file to start with (-S).
   This is used for resuming an aborted comparison.
   All file names less than this name are ignored.  */
Joachim Eibl's avatar
Joachim Eibl committed
145
const QChar *starting_file;
146 147

/* Pipe each file's output through pr (-l).  */
Joachim Eibl's avatar
Joachim Eibl committed
148
bool paginate;
149 150

/* Line group formats for unchanged, old, new, and changed groups.  */
Joachim Eibl's avatar
Joachim Eibl committed
151
const QChar *group_format[CHANGED + 1];
152 153

/* Line formats for unchanged, old, and new lines.  */
Joachim Eibl's avatar
Joachim Eibl committed
154
const QChar *line_format[NEW + 1];
155 156

/* If using OUTPUT_SDIFF print extra information to help the sdiff filter.  */
Joachim Eibl's avatar
Joachim Eibl committed
157
bool sdiff_merge_assist;
158 159

/* Tell OUTPUT_SDIFF to show only the left version of common lines.  */
Joachim Eibl's avatar
Joachim Eibl committed
160
bool left_column;
161 162

/* Tell OUTPUT_SDIFF to not show common lines.  */
Joachim Eibl's avatar
Joachim Eibl committed
163
bool suppress_common_lines;
164 165

/* The half line width and column 2 offset for OUTPUT_SDIFF.  */
Joachim Eibl's avatar
Joachim Eibl committed
166 167
unsigned int sdiff_half_width;
unsigned int sdiff_column2_offset;
168 169 170

/* Use heuristics for better speed with large files with a small
   density of changes.  */
Joachim Eibl's avatar
Joachim Eibl committed
171
bool speed_large_files;
172 173

/* Patterns that match file names to be excluded.  */
Joachim Eibl's avatar
Joachim Eibl committed
174
struct exclude *excluded;
175 176 177

/* Don't discard lines.  This makes things slower (sometimes much
   slower) but will find a guaranteed minimal set of changes.  */
Joachim Eibl's avatar
Joachim Eibl committed
178
bool minimal;
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194


/* The result of comparison is an "edit script": a chain of `struct change'.
   Each `struct change' represents one place where some lines are deleted
   and some are inserted.

   LINE0 and LINE1 are the first affected lines in the two files (origin 0).
   DELETED is the number of lines deleted here from file 0.
   INSERTED is the number of lines inserted here in file 1.

   If DELETED is 0 then LINE0 is the number of the line before
   which the insertion was done; vice versa for INSERTED and LINE1.  */

struct change
{
  struct change *link;		/* Previous or next edit command  */
195 196 197 198
  LineRef inserted;			/* # lines of file 1 changed here.  */
  LineRef deleted;			/* # lines of file 0 changed here.  */
  LineRef line0;			/* Line number of 1st deleted line.  */
  LineRef line1;			/* Line number of 1st inserted line.  */
199 200 201 202 203 204 205 206 207
  bool ignore;			/* Flag used in context.c.  */
};

/* Structures that describe the input files.  */

/* Data on one input file being compared.  */

struct file_data {
    /* Buffer in which text of file is read.  */
Joachim Eibl's avatar
Joachim Eibl committed
208
    const QChar* buffer;
209

Joachim Eibl's avatar
Joachim Eibl committed
210
    /* Allocated size of buffer, in QChars.  Always a multiple of
211 212 213 214 215 216 217
       sizeof *buffer.  */
    size_t bufsize;

    /* Number of valid bytes now in the buffer.  */
    size_t buffered;

    /* Array of pointers to lines in the file.  */
Joachim Eibl's avatar
Joachim Eibl committed
218
    const QChar **linbuf;
219 220 221 222 223

    /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines.
       linebuf[linbuf_base ... buffered_lines - 1] are possibly differing.
       linebuf[linbuf_base ... valid_lines - 1] contain valid data.
       linebuf[linbuf_base ... alloc_lines - 1] are allocated.  */
224
    LineRef linbuf_base, buffered_lines, valid_lines, alloc_lines;
225 226

    /* Pointer to end of prefix of this file to ignore when hashing.  */
Joachim Eibl's avatar
Joachim Eibl committed
227
    const QChar *prefix_end;
228 229 230

    /* Count of lines in the prefix.
       There are this many lines in the file before linbuf[0].  */
231
    LineRef prefix_lines;
232 233

    /* Pointer to start of suffix of this file to ignore when hashing.  */
Joachim Eibl's avatar
Joachim Eibl committed
234
    const QChar *suffix_begin;
235 236 237 238

    /* Vector, indexed by line number, containing an equivalence code for
       each line.  It is this vector that is actually compared with that
       of another file to generate differences.  */
239
    LineRef *equivs;
240 241 242

    /* Vector, like the previous one except that
       the elements for discarded lines have been squeezed out.  */
243
    LineRef *undiscarded;
244 245 246

    /* Vector mapping virtual line numbers (not counting discarded lines)
       to real ones (counting those lines).  Both are origin-0.  */
247
    LineRef *realindexes;
248 249

    /* Total number of nondiscarded lines.  */
250
    LineRef nondiscarded_lines;
251 252 253 254 255 256 257 258 259 260 261

    /* Vector, indexed by real origin-0 line number,
       containing TRUE for a line that is an insertion or a deletion.
       The results of comparison are stored here.  */
    bool *changed;

    /* 1 if at end of file.  */
    bool eof;

    /* 1 more than the maximum equivalence value used for this or its
       sibling file.  */
262
    LineRef equiv_max;
263 264 265 266 267 268 269 270 271 272 273 274
};

/* Data on two input files being compared.  */

struct comparison
  {
    struct file_data file[2];
    struct comparison const *parent;  /* parent, if a recursive comparison */
  };

/* Describe the two files currently being compared.  */

Joachim Eibl's avatar
Joachim Eibl committed
275
struct file_data files[2];
276 277 278

/* Stdio stream to output diffs to.  */

Joachim Eibl's avatar
Joachim Eibl committed
279
FILE *outfile;
280 281 282 283 284 285 286 287 288 289 290

/* Declare various functions.  */

/* analyze.c */
struct change* diff_2_files (struct comparison *);

/* context.c */
void print_context_header (struct file_data[], bool);
void print_context_script (struct change *, bool);

/* dir.c */
Joachim Eibl's avatar
Joachim Eibl committed
291
int diff_dirs (struct comparison const *, int (*) (struct comparison const *, const QChar *, const QChar *));
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313

/* ed.c */
void print_ed_script (struct change *);
void pr_forward_ed_script (struct change *);

/* ifdef.c */
void print_ifdef_script (struct change *);

/* io.c */
void file_block_read (struct file_data *, size_t);
bool read_files (struct file_data[], bool);

/* normal.c */
void print_normal_script (struct change *);

/* rcs.c */
void print_rcs_script (struct change *);

/* side.c */
void print_sdiff_script (struct change *);

/* util.c */
Joachim Eibl's avatar
Joachim Eibl committed
314
QChar *concat (const QChar *, const QChar *, const QChar *);
Joachim Eibl's avatar
Joachim Eibl committed
315
bool lines_differ ( const QChar *, size_t, const QChar *, size_t );
316
LineRef translate_line_number (struct file_data const *, LineRef);
317 318 319
struct change *find_change (struct change *);
struct change *find_reverse_change (struct change *);
void *zalloc (size_t);
320
enum changes analyze_hunk (struct change *, LineRef *, LineRef *, LineRef *, LineRef *);
321
void begin_output ();
322
void debug_script (struct change *);
323
void finish_output ();
Joachim Eibl's avatar
Joachim Eibl committed
324 325 326 327 328
void message (const QChar *, const QChar *, const QChar *);
void message5 (const QChar *, const QChar *, const QChar *, const QChar *, const QChar *);
void output_1_line (const QChar *, const QChar *, const QChar *, const QChar *);
void perror_with_name (const QChar *);
void setup_output (const QChar *, const QChar *, bool);
329
void translate_range (struct file_data const *, LineRef, LineRef, long *, long *);
330 331

/* version.c */
Joachim Eibl's avatar
Joachim Eibl committed
332
//extern const QChar version_string[];
Joachim Eibl's avatar
Joachim Eibl committed
333 334 335

private:
   // gnudiff_analyze.cpp
336 337
   LineRef diag (LineRef xoff, LineRef xlim, LineRef yoff, LineRef ylim, bool find_minimal, struct partition *part);
   void compareseq (LineRef xoff, LineRef xlim, LineRef yoff, LineRef ylim, bool find_minimal);
Joachim Eibl's avatar
Joachim Eibl committed
338 339
   void discard_confusing_lines (struct file_data filevec[]);
   void shift_boundaries (struct file_data filevec[]);
340
   struct change * add_change (LineRef line0, LineRef line1, LineRef deleted, LineRef inserted, struct change *old);
Joachim Eibl's avatar
Joachim Eibl committed
341 342
   struct change * build_reverse_script (struct file_data const filevec[]);
   struct change* build_script (struct file_data const filevec[]);
Joachim Eibl's avatar
Joachim Eibl committed
343

Joachim Eibl's avatar
Joachim Eibl committed
344 345 346 347 348 349 350
   // gnudiff_io.cpp
   void find_and_hash_each_line (struct file_data *current);
   void find_identical_ends (struct file_data filevec[]);

   // gnudiff_xmalloc.cpp
   void *xmalloc (size_t n);
   void *xrealloc(void *p, size_t n);
351
   void xalloc_die ();
Joachim Eibl's avatar
Joachim Eibl committed
352

Joachim Eibl's avatar
Joachim Eibl committed
353
   inline bool isWhite( QChar c )
Joachim Eibl's avatar
Joachim Eibl committed
354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
   {
      return c==' ' || c=='\t' ||  c=='\r';
   }
}; // class GnuDiff

# define XMALLOC(Type, N_items) ((Type *) xmalloc (sizeof (Type) * (N_items)))
# define XREALLOC(Ptr, Type, N_items) \
  ((Type *) xrealloc ((void *) (Ptr), sizeof (Type) * (N_items)))

/* Declare and alloc memory for VAR of type TYPE. */
# define NEW(Type, Var)  Type *(Var) = XMALLOC (Type, 1)

/* Free VAR only if non NULL. */
# define XFREE(Var)	\
   do {                 \
      if (Var)          \
        free (Var);     \
   } while (0)

/* Return a pointer to a malloc'ed copy of the array SRC of NUM elements. */
# define CCLONE(Src, Num) \
  (memcpy (xmalloc (sizeof (*Src) * (Num)), (Src), sizeof (*Src) * (Num)))

/* Return a malloc'ed copy of SRC. */
# define CLONE(Src) CCLONE (Src, 1)

#endif