Tidy-up.

2013-01-05 20:06:33 +00:00
parent 5ddd7a5f3c
commit 2d0faa2b74
2 changed files with 0 additions and 691 deletions
--- a/lib/diff.py
+++ b/lib/diff.py
@@ -1,690 +0,0 @@
-class MyersDiffer:
-    """
-    An implementation of Eugene Myers's O(ND) Diff algorithm based on GNU diff.
-    """
-    SNAKE_LIMIT = 20
-
-    DISCARD_NONE = 0
-    DISCARD_FOUND = 1
-    DISCARD_CANCEL = 2
-
-    # The Myers diff algorithm effectively turns the diff problem into a graph
-    # search.  It works by finding the "shortest middle snake," which
-
-    class DiffData:
-        def __init__(self, data):
-            self.data = data
-            self.length = len(data)
-            self.modified = {}
-            self.undiscarded = []
-            self.undiscarded_lines = 0
-            self.real_indexes = []
-
-        def is_modified(self, line):
-            return self.modified.has_key(line) and self.modified[line]
-
-
-    def __init__(self, a, b, ignore_space=False):
-        if type(a) != type(b):
-            raise TypeError
-
-        self.a = a
-        self.b = b
-        self.code_table = {}
-        self.last_code = 0
-        self.a_data = self.b_data = None
-        self.ignore_space = ignore_space
-        self.minimal_diff = False
-
-        # SMS State
-        self.max_lines = 0
-        self.fdiag = None
-        self.bdiag = None
-
-    def ratio(self):
-        self._gen_diff_data()
-        a_equals = self.a_data.length - len(self.a_data.modified)
-        b_equals = self.b_data.length - len(self.b_data.modified)
-
-        return 1.0 * (a_equals + b_equals) / \
-                     (self.a_data.length + self.b_data.length)
-
-    def get_opcodes(self):
-        """
-        Generator that returns opcodes representing the contents of the
-        diff.
-
-        The resulting opcodes are in the format of
-        (tag, i1, i2, j1, j2)
-        """
-        self._gen_diff_data()
-
-        a_line = b_line = 0
-        last_group = None
-
-        # Go through the entire set of lines on both the old and new files
-        while a_line < self.a_data.length or b_line < self.b_data.length:
-            a_start = a_line
-            b_start = b_line
-
-            if a_line < self.a_data.length and \
-               not self.a_data.is_modified(a_line) and \
-               b_line < self.b_data.length and \
-               not self.b_data.is_modified(b_line):
-                # Equal
-                a_changed = b_changed = 1
-                tag = "equal"
-                a_line += 1
-                b_line += 1
-            else:
-                # Deleted, inserted or replaced
-
-                # Count every old line that's been modified, and the
-                # remainder of old lines if we've reached the end of the new
-                # file.
-                while a_line < self.a_data.length and \
-                      (b_line >= self.b_data.length or \
-                       self.a_data.is_modified(a_line)):
-                    a_line += 1
-
-                # Count every new line that's been modified, and the
-                # remainder of new lines if we've reached the end of the old
-                # file.
-                while b_line < self.b_data.length and \
-                      (a_line >= self.a_data.length or \
-                       self.b_data.is_modified(b_line)):
-                    b_line += 1
-
-                a_changed = a_line - a_start
-                b_changed = b_line - b_start
-
-                assert a_start < a_line or b_start < b_line
-                assert a_changed != 0 or b_changed != 0
-
-                if a_changed == 0 and b_changed > 0:
-                    tag = "insert"
-                elif a_changed > 0 and b_changed == 0:
-                    tag = "delete"
-                elif a_changed > 0 and b_changed > 0:
-                    tag = "replace"
-
-                    if a_changed != b_changed:
-                        if a_changed > b_changed:
-                            a_line -= a_changed - b_changed
-                        elif a_changed < b_changed:
-                            b_line -= b_changed - a_changed
-
-                        a_changed = b_changed = min(a_changed, b_changed)
-
-            if last_group and last_group[0] == tag:
-                last_group = (tag,
-                              last_group[1], last_group[2] + a_changed,
-                              last_group[3], last_group[4] + b_changed)
-            else:
-                if last_group:
-                    yield last_group
-
-                last_group = (tag, a_start, a_start + a_changed,
-                              b_start, b_start + b_changed)
-
-
-        if not last_group:
-            last_group = ("equal", 0, self.a_data.length, 0, self.b_data.length)
-
-        yield last_group
-
-    def _gen_diff_data(self):
-        """
-        Generate all the diff data needed to return opcodes or the diff ratio.
-        This is only called once during the liftime of a MyersDiffer instance.
-        """
-        if self.a_data and self.b_data:
-            return
-
-        self.a_data = self.DiffData(self._gen_diff_codes(self.a))
-        self.b_data = self.DiffData(self._gen_diff_codes(self.b))
-
-        self._discard_confusing_lines()
-
-        self.max_lines = self.a_data.undiscarded_lines + \
-                         self.b_data.undiscarded_lines + 3
-
-        vector_size = self.a_data.undiscarded_lines + \
-                      self.b_data.undiscarded_lines + 3
-        self.fdiag = [0] * vector_size
-        self.bdiag = [0] * vector_size
-        self.downoff = self.upoff = self.b_data.undiscarded_lines + 1
-
-        self._lcs(0, self.a_data.undiscarded_lines,
-                  0, self.b_data.undiscarded_lines,
-                  self.minimal_diff)
-        self._shift_chunks(self.a_data, self.b_data)
-        self._shift_chunks(self.b_data, self.a_data)
-
-    def _gen_diff_codes(self, lines):
-        """
-        Converts all unique lines of text into unique numbers. Comparing
-        lists of numbers is faster than comparing lists of strings.
-        """
-        codes = []
-
-        for line in lines:
-            # TODO: Handle ignoring/triming spaces, ignoring casing, and
-            #       special hooks
-
-            if self.ignore_space:
-                temp = line.lstrip()
-
-                # We still want to show lines that contain only whitespace.
-                if temp != "":
-                    line = temp
-
-            if self.code_table.has_key(line):
-                code = self.code_table[line]
-            else:
-                # This is a new, unrecorded line, so mark it and store it.
-                self.last_code += 1
-                code = self.last_code
-                self.code_table[line] = code
-
-            codes.append(code)
-
-        return codes
-
-    def _find_sms(self, a_lower, a_upper, b_lower, b_upper, find_minimal):
-        """
-        Finds the Shortest Middle Snake.
-        """
-        down_vector = self.fdiag # The vector for the (0, 0) to (x, y) search
-        up_vector   = self.bdiag # The vector for the (u, v) to (N, M) search
-
-        down_k = a_lower - b_lower # The k-line to start the forward search
-        up_k   = a_upper - b_upper # The k-line to start the reverse search
-        odd_delta = (down_k - up_k) % 2 != 0
-
-        down_vector[self.downoff + down_k] = a_lower
-        up_vector[self.upoff + up_k] = a_upper
-
-        dmin = a_lower - b_upper
-        dmax = a_upper - b_lower
-
-        down_min = down_max = down_k
-        up_min   = up_max   = up_k
-
-        cost = 0
-        max_cost = max(256, self._very_approx_sqrt(self.max_lines * 4))
-
-        while True:
-            cost += 1
-            big_snake = False
-
-            if down_min > dmin:
-                down_min -= 1
-                down_vector[self.downoff + down_min - 1] = -1
-            else:
-                down_min += 1
-
-            if down_max < dmax:
-                down_max += 1
-                down_vector[self.downoff + down_max + 1] = -1
-            else:
-                down_max -= 1
-
-            # Extend the forward path
-            for k in xrange(down_max, down_min - 1, -2):
-                tlo = down_vector[self.downoff + k - 1]
-                thi = down_vector[self.downoff + k + 1]
-
-                if tlo >= thi:
-                    x = tlo + 1
-                else:
-                    x = thi
-
-                y = x - k
-                old_x = x
-
-                # Find the end of the furthest reaching forward D-path in
-                # diagonal k
-                while x < a_upper and y < b_upper and \
-                      self.a_data.undiscarded[x] == self.b_data.undiscarded[y]:
-                    x += 1
-                    y += 1
-
-                if x - old_x > self.SNAKE_LIMIT:
-                    big_snake = True
-
-                down_vector[self.downoff + k] = x
-
-                if odd_delta and up_min <= k <= up_max and \
-                   up_vector[self.upoff + k] <= x:
-                    return x, y, True, True
-
-            # Extend the reverse path
-            if up_min > dmin:
-                up_min -= 1
-                up_vector[self.upoff + up_min - 1] = self.max_lines
-            else:
-                up_min += 1
-
-            if up_max < dmax:
-                up_max += 1
-                up_vector[self.upoff + up_max + 1] = self.max_lines
-            else:
-                up_max -= 1
-
-            for k in xrange(up_max, up_min - 1, -2):
-                tlo = up_vector[self.upoff + k - 1]
-                thi = up_vector[self.upoff + k + 1]
-
-                if tlo < thi:
-                    x = tlo
-                else:
-                    x = thi - 1
-
-                y = x - k
-                old_x = x
-
-                while x > a_lower and y > b_lower and \
-                      self.a_data.undiscarded[x - 1] == \
-                      self.b_data.undiscarded[y - 1]:
-                    x -= 1
-                    y -= 1
-
-                if old_x - x > self.SNAKE_LIMIT:
-                    big_snake = True
-
-                up_vector[self.upoff + k] = x
-
-                if not odd_delta and down_min <= k <= down_max and \
-                   x <= down_vector[self.downoff + k]:
-                    return x, y, True, True
-
-            if find_minimal:
-                continue
-
-            # Heuristics courtesy of GNU diff.
-            #
-            # We check occasionally for a diagonal that made lots of progress
-            # compared with the edit distance. If we have one, find the one
-            # that made the most progress and return it.
-            #
-            # This gives us better, more dense chunks, instead of lots of
-            # small ones often starting with replaces. It also makes the output
-            # closer to that of GNU diff, which more people would expect.
-
-            if cost > 200 and big_snake:
-                def find_diagonal(minimum, maximum, k, best, diagoff, vector,
-                                  vdiff_func, check_x_range, check_y_range,
-                                  discard_index, k_offset):
-                    for d in xrange(maximum, minimum - 1, -2):
-                        dd = d - k
-                        x = vector[diagoff + d]
-                        y = x - d
-                        v = vdiff_func(x) * 2 + dd
-
-                        if v > 12 * (cost + abs(dd)):
-                            if v > best and \
-                               check_x_range(x) and check_y_range(y):
-                                # We found a sufficient diagonal.
-                                k = k_offset
-                                x_index = discard_index(x, k)
-                                y_index = discard_index(y, k)
-
-                                while self.a_data.undiscarded[x_index] == \
-                                      self.b_data.undiscarded[y_index]:
-                                    if k == self.SNAKE_LIMIT - 1 + k_offset:
-                                        return x, y, v
-
-                                    k += 1
-                    return 0, 0, 0
-
-                ret_x, ret_y, best = \
-                    find_diagonal(down_min, down_max, down_k, 0, self.downoff,
-                                  down_vector,
-                                  lambda x: x - a_lower,
-                                  lambda x: a_lower + self.SNAKE_LIMIT <=
-                                            x < a_upper,
-                                  lambda y: b_lower + self.SNAKE_LIMIT <=
-                                            y < b_upper,
-                                  lambda i,k: i - k,
-                                  1)
-
-                if best > 0:
-                    return ret_x, ret_y, True, False
-
-                ret_x, ret_y, best = \
-                    find_diagonal(up_min, up_max, up_k, best, self.upoff,
-                                  up_vector,
-                                  lambda x: a_upper - x,
-                                  lambda x: a_lower < x <= a_upper -
-                                                           self.SNAKE_LIMIT,
-                                  lambda y: b_lower < y <= b_upper -
-                                                           self.SNAKE_LIMIT,
-                                  lambda i,k: i + k,
-                                  0)
-
-                if best > 0:
-                    return ret_x, ret_y, False, True
-
-            continue # XXX
-
-            # If we've reached or gone past the max cost, just give up now
-            # and report the halfway point between our best results.
-            if cost >= max_cost:
-                fx_best = bx_best = 0
-
-                # Find the forward diagonal that maximized x + y
-                fxy_best = -1
-                for d in xrange(down_max, down_min - 1, -2):
-                    x = min(down_vector[self.downoff + d], a_upper)
-                    y = x - d
-
-                    if b_upper < y:
-                        x = b_upper + d
-                        y = b_upper
-
-                    if fxy_best < x + y:
-                        fxy_best = x + y
-                        fx_best = x
-
-                # Find the backward diagonal that minimizes x + y
-                bxy_best = self.max_lines
-                for d in xrange(up_max, up_min - 1, -2):
-                    x = max(a_lower, up_vector[self.upoff + d])
-                    y = x - d
-
-                    if y < b_lower:
-                        x = b_lower + d
-                        y = b_lower
-
-                    if x + y < bxy_best:
-                        bxy_best = x + y
-                        bx_bet = x
-
-                # Use the better of the two diagonals
-                if a_upper + b_upper - bxy_best < \
-                   fxy_best - (a_lower + b_lower):
-                    return fx_best, fxy_best - fx_best, True, False
-                else:
-                    return bx_best, bxy_best - bx_best, False, True
-
-
-        raise Exception("The function should not have reached here.")
-
-    def _lcs(self, a_lower, a_upper, b_lower, b_upper, find_minimal):
-        """
-        The divide-and-conquer implementation of the Longest Common
-        Subsequence (LCS) algorithm.
-        """
-        # Fast walkthrough equal lines at the start
-        while a_lower < a_upper and b_lower < b_upper and \
-              self.a_data.undiscarded[a_lower] == \
-              self.b_data.undiscarded[b_lower]:
-            a_lower += 1
-            b_lower += 1
-
-        while a_upper > a_lower and b_upper > b_lower and \
-              self.a_data.undiscarded[a_upper - 1] == \
-              self.b_data.undiscarded[b_upper - 1]:
-            a_upper -= 1
-            b_upper -= 1
-
-        if a_lower == a_upper:
-            # Inserted lines.
-            while b_lower < b_upper:
-                self.b_data.modified[self.b_data.real_indexes[b_lower]] = True
-                b_lower += 1
-        elif b_lower == b_upper:
-            # Deleted lines
-            while a_lower < a_upper:
-                self.a_data.modified[self.a_data.real_indexes[a_lower]] = True
-                a_lower += 1
-        else:
-            # Find the middle snake and length of an optimal path for A and B
-            x, y, low_minimal, high_minimal = \
-                self._find_sms(a_lower, a_upper, b_lower, b_upper,
-                               find_minimal)
-
-            self._lcs(a_lower, x, b_lower, y, low_minimal)
-            self._lcs(x, a_upper, y, b_upper, high_minimal)
-
-    def _shift_chunks(self, data, other_data):
-        """
-        Shifts the inserts/deletes of identical lines in order to join
-        the changes together a bit more. This has the effect of cleaning
-        up the diff.
-
-        Often times, a generated diff will have two identical lines before
-        and after a chunk (say, a blank line). The default algorithm will
-        insert at the front of that range and include two blank lines at the
-        end, but that does not always produce the best looking diff. Since
-        the two lines are identical, we can shift the chunk so that the line
-        appears both before and after the line, rather than only after.
-        """
-        i = j = 0
-        i_end = data.length
-
-        while True:
-            # Scan forward in order to find the start of a run of changes.
-            while i < i_end and not data.is_modified(i):
-                i += 1
-
-                while other_data.is_modified(j):
-                    j += 1
-
-            if i == i_end:
-                return;
-
-            start = i
-
-            # Find the end of these changes
-            i += 1
-            while data.is_modified(i):
-                i += 1
-
-            while other_data.is_modified(j):
-                j += 1
-
-            while True:
-                run_length = i - start
-
-                # Move the changed chunks back as long as the previous
-                # unchanged line matches the last changed line.
-                # This merges with the previous changed chunks.
-                while start != 0 and data.data[start - 1] == data.data[i - 1]:
-                    start -= 1
-                    i -= 1
-
-                    data.modified[start] = True
-                    data.modified[i] = False
-
-                    while data.is_modified(start - 1):
-                        start -= 1
-
-                    j -= 1
-                    while other_data.is_modified(j):
-                        j -= 1
-
-                # The end of the changed run at the last point where it
-                # corresponds to the changed run in the other data set.
-                # If it's equal to i_end, then we didn't find a corresponding
-                # point.
-                if other_data.is_modified(j - 1):
-                    corresponding = i
-                else:
-                    corresponding = i_end
-
-                # Move the changed region forward as long as the first
-                # changed line is the same as the following unchanged line.
-                while i != i_end and data.data[start] == data.data[i]:
-                    data.modified[start] = False
-                    data.modified[i] = True
-
-                    start += 1
-                    i += 1
-
-                    while data.is_modified(i):
-                        i += 1
-
-                    j += 1
-                    while other_data.is_modified(j):
-                        j += 1
-                        corresponding = i
-
-                if run_length == i - start:
-                    break
-
-            # Move the fully-merged run back to a corresponding run in the
-            # other data set, if we can.
-            while corresponding < i:
-                start -= 1
-                i -= 1
-
-                data.modified[start] = True
-                data.modified[i] = False
-
-                j -= 1
-                while other_data.is_modified(j):
-                    j -= 1
-
-    def _discard_confusing_lines(self):
-        def build_discard_list(data, discards, counts):
-            many = 5 * self._very_approx_sqrt(data.length / 64)
-
-            for i, item in enumerate(data.data):
-                if item != 0:
-                    num_matches = counts[item]
-
-                    if num_matches == 0:
-                        discards[i] = self.DISCARD_FOUND
-                    elif num_matches > many:
-                        discards[i] = self.DISCARD_CANCEL
-
-        def scan_run(discards, i, length, index_func):
-            consec = 0
-
-            for j in xrange(length):
-                index = index_func(i, j)
-                discard = discards[index]
-
-                if j >= 8 and discard == self.DISCARD_FOUND:
-                    break
-
-                if discard == self.DISCARD_FOUND:
-                    consec += 1
-                else:
-                    consec = 0
-
-                    if discard == self.DISCARD_CANCEL:
-                        discards[index] = self.DISCARD_NONE
-
-                if consec == 3:
-                    break
-
-        def check_discard_runs(data, discards):
-            i = 0
-            while i < data.length:
-                # Cancel the provisional discards that are not in the middle
-                # of a run of discards
-                if discards[i] == self.DISCARD_CANCEL:
-                    discards[i] = self.DISCARD_NONE
-                elif discards[i] == self.DISCARD_FOUND:
-                    # We found a provisional discard
-                    provisional = 0
-
-                    # Find the end of this run of discardable lines and count
-                    # how many are provisionally discardable.
-                    #for j in xrange(i, data.length):
-                    j = i
-                    while j < data.length:
-                        if discards[j] == self.DISCARD_NONE:
-                            break
-                        elif discards[j] == self.DISCARD_CANCEL:
-                            provisional += 1
-                        j += 1
-
-                    # Cancel the provisional discards at the end and shrink
-                    # the run.
-                    while j > i and discards[j - 1] == self.DISCARD_CANCEL:
-                        j -= 1
-                        discards[j] = 0
-                        provisional -= 1
-
-                    length = j - i
-
-                    # If 1/4 of the lines are provisional, cancel discarding
-                    # all the provisional lines in the run.
-                    if provisional * 4 > length:
-                        while j > i:
-                            j -= 1
-                            if discards[j] == self.DISCARD_CANCEL:
-                                discards[j] = self.DISCARD_NONE
-                    else:
-                        minimum = 1 + self._very_approx_sqrt(length / 4)
-                        j = 0
-                        consec = 0
-                        while j < length:
-                            if discards[i + j] != self.DISCARD_CANCEL:
-                                consec = 0
-                            else:
-                                consec += 1
-                                if minimum == consec:
-                                    j -= consec
-                                elif minimum < consec:
-                                    discards[i + j] = self.DISCARD_NONE
-
-                            j += 1
-
-                        scan_run(discards, i, length, lambda x,y: x + y)
-                        i += length - 1
-                        scan_run(discards, i, length, lambda x,y: x - y)
-
-                i += 1
-
-        def discard_lines(data, discards):
-            j = 0
-            for i, item in enumerate(data.data):
-                if self.minimal_diff or discards[i] == self.DISCARD_NONE:
-                    data.undiscarded[j] = item
-                    data.real_indexes[j] = i
-                    j += 1
-                else:
-                    data.modified[i] = True
-
-            data.undiscarded_lines = j
-
-
-        self.a_data.undiscarded = [0] * self.a_data.length
-        self.b_data.undiscarded = [0] * self.b_data.length
-        self.a_data.real_indexes = [0] * self.a_data.length
-        self.b_data.real_indexes = [0] * self.b_data.length
-        a_discarded = [0] * self.a_data.length
-        b_discarded = [0] * self.b_data.length
-        a_code_counts = [0] * (1 + self.last_code)
-        b_code_counts = [0] * (1 + self.last_code)
-
-        for item in self.a_data.data:
-            a_code_counts[item] += 1
-
-        for item in self.b_data.data:
-            b_code_counts[item] += 1
-
-        build_discard_list(self.a_data, a_discarded, b_code_counts)
-        build_discard_list(self.b_data, b_discarded, a_code_counts)
-
-        check_discard_runs(self.a_data, a_discarded)
-        check_discard_runs(self.b_data, b_discarded)
-
-        discard_lines(self.a_data, a_discarded)
-        discard_lines(self.b_data, b_discarded)
-
-
-    def _very_approx_sqrt(self, i):
-        result = 1
-        i /= 4
-        while i > 0:
-            i /= 4
-            result *= 2
-
-        return result
-		
--- a/lib/mergely.css
+++ b/lib/mergely.css
@@ -11,7 +11,6 @@
 .mergely-column { border: 1px solid #ccc; }
 .mergely-active { border: 1px solid #4ba3fa; }

-
 .mergely.a.rhs.start { border-top: 1px solid #4ba3fa; }
 .mergely.a.lhs.start.end,
 .mergely.a.rhs.end { border-bottom: 1px solid #4ba3fa; }