Coverage report for lib/src/line_splitting/line_splitter.dart

Line coverage: 23 / 30 (76.7%)

All files > lib/src/line_splitting/line_splitter.dart

1
// Copyright (c) 2015, the Dart project authors.  Please see the AUTHORS file
2
// for details. All rights reserved. Use of this source code is governed by a
3
// BSD-style license that can be found in the LICENSE file.
4
5
library dart_style.src.line_splitting.line_splitter;
6
7
import '../chunk.dart';
8
import '../debug.dart' as debug;
9
import '../line_writer.dart';
10
import '../rule/rule.dart';
11
import 'rule_set.dart';
12
import 'solve_state.dart';
13
import 'solve_state_queue.dart';
14
15
/// To ensure the solver doesn't go totally pathological on giant code, we cap
16
/// it at a fixed number of attempts.
17
///
18
/// If the optimal solution isn't found after this many tries, it just uses the
19
/// best it found so far.
20
const _maxAttempts = 5000;
21
22
/// Takes a set of chunks and determines the best values for its rules in order
23
/// to fit it inside the page boundary.
24
///
25
/// This problem is exponential in the number of rules and a single expression
26
/// in Dart can be quite large, so it isn't feasible to brute force this. For
27
/// example:
28
///
29
///     outer(
30
///         fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
31
///         fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
32
///         fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
33
///         fn(1 + 2, 3 + 4, 5 + 6, 7 + 8));
34
///
35
/// There are 509,607,936 ways this can be split.
36
///
37
/// The problem is even harder because we may not be able to easily tell if a
38
/// given solution is the best one. It's possible that there is *no* solution
39
/// that fits in the page (due to long strings or identifiers) so the winning
40
/// solution may still have overflow characters. This makes it hard to know
41
/// when we are done and can stop looking.
42
///
43
/// There are a couple of pieces of domain knowledge we use to cope with this:
44
///
45
/// - Changing a rule from unsplit to split will never lower its cost. A
46
///   solution with all rules unsplit will always be the one with the lowest
47
///   cost (zero). Conversely, setting all of its rules to the maximum split
48
///   value will always have the highest cost.
49
///
50
///   (You might think there is a converse rule about overflow characters. The
51
///   solution with the fewest splits will have the most overflow, and the
52
///   solution with the most splits will have the least overflow. Alas, because
53
///   of indentation, that isn't always the case. Adding a split may *increase*
54
///   overflow in some cases.)
55
///
56
/// - If all of the chunks for a rule are inside lines that already fit in the
57
///   page, then splitting that rule will never improve the solution.
58
///
59
/// - If two partial solutions have the same cost and the bound rules don't
60
///   affect any of the remaining unbound rules, then whichever partial
61
///   solution is currently better will always be the winner regardless of what
62
///   the remaining unbound rules are bound to.
63
///
64
/// We start off with a [SolveState] where all rules are unbound (which
65
/// implicitly treats them as unsplit). For a given solve state, we can produce
66
/// a set of expanded states that takes some of the rules in the first long
67
/// line and bind them to split values. This always produces new solve states
68
/// with higher cost (but often fewer overflow characters) than the parent
69
/// state.
70
///
71
/// We take these expanded states and add them to a work list sorted by cost.
72
/// Since unsplit rules always have lower cost solutions, we know that no state
73
/// we enqueue later will ever have a lower cost than the ones we already have
74
/// enqueued.
75
///
76
/// Then we keep pulling states off the work list and expanding them and adding
77
/// the results back into the list. We do this until we hit a solution where
78
/// all characters fit in the page. The first one we find will have the lowest
79
/// cost and we're done.
80
///
81
/// We also keep running track of the best solution we've found so far that
82
/// has the fewest overflow characters and the lowest cost. If no solution fits,
83
/// we'll use this one.
84
///
85
/// When enqueing a solution, we can sometimes collapse it and a previously
86
/// queued one by preferring one or the other. If two solutions have the same
87
/// cost and we can prove that they won't diverge later as unbound rules are
88
/// set, we can pick the winner now and discard the other. This lets us avoid
89
/// redundantly exploring entire subtrees of the solution space.
90
///
91
/// As a final escape hatch for pathologically nasty code, after trying some
92
/// fixed maximum number of solve states, we just bail and return the best
93
/// solution found so far.
94
///
95
/// Even with the above algorithmic optimizations, complex code may still
96
/// require a lot of exploring to find an optimal solution. To make that fast,
97
/// this code is carefully profiled and optimized. If you modify this, make
98
/// sure to test against the benchmark to ensure you don't regress performance.
99
class LineSplitter {
100
  final LineWriter writer;
101
102
  /// The list of chunks being split.
103
  final List<Chunk> chunks;
104
105
  /// The set of soft rules whose values are being selected.
106
  final List<Rule> rules;
107
108
  /// The number of characters of additional indentation to apply to each line.
109
  ///
110
  /// This is used when formatting blocks to get the output into the right
111
  /// column based on where the block appears.
112
  final int blockIndentation;
113
114
  /// The starting column of the first line.
115
  final int firstLineIndent;
116
117
  /// The queue of solve states to explore further.
118
  ///
119
  /// This is sorted lowest-cost first. This ensures that as soon as we find a
120
  /// solution that fits in the page, we know it will be the lowest cost one
121
  /// and can stop looking.
122
  final _queue = new SolveStateQueue();
123
124
  /// The lowest cost solution found so far.
125
  SolveState _bestSolution;
126
127
  /// Creates a new splitter for [_writer] that tries to fit [chunks] into the
128
  /// page width.
1293
  LineSplitter(this.writer, List<Chunk> chunks, int blockIndentation,
130
      int firstLineIndent,
131
      {bool flushLeft: false})
132
      : chunks = chunks,
133
        // Collect the set of rules that we need to select values for.
134
        rules = chunks
1359
            .map((chunk) => chunk.rule)
1366
            .where((rule) => rule != null)
1373
            .toSet()
1383
            .toList(growable: false),
139
        blockIndentation = blockIndentation,
1403
        firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation {
1416
    _queue.bindSplitter(this);
142
143
    // Store the rule's index in the rule so we can get from a chunk to a rule
144
    // index quickly.
14512
    for (var i = 0; i < rules.length; i++) {
1469
      rules[i].index = i;
147
    }
148
149
    // Now that every used rule has an index, tell the rules to discard any
150
    // constraints on unindexed rules.
1516
    for (var rule in rules) {
1523
      rule.forgetUnusedRules();
153
    }
154
  }
155
156
  /// Determine the best way to split the chunks into lines that fit in the
157
  /// page, if possible.
158
  ///
159
  /// Returns a [SplitSet] that defines where each split occurs and the
160
  /// indentation of each line.
161
  ///
162
  /// [firstLineIndent] is the number of characters of whitespace to prefix the
163
  /// first line of output with.
1643
  SplitSet apply() {
165
    // Start with a completely unbound, unsplit solution.
16618
    _queue.add(new SolveState(this, new RuleSet(rules.length)));
167
168
    var attempts = 0;
1696
    while (_queue.isNotEmpty) {
1706
      var state = _queue.removeFirst();
171
1726
      if (state.isBetterThan(_bestSolution)) {
1733
        _bestSolution = state;
174
175
        // Since we sort solutions by cost the first solution we find that
176
        // fits is the winner.
1779
        if (_bestSolution.overflowChars == 0) break;
178
      }
179
180
      if (debug.traceSplitter) {
1810
        var best = state == _bestSolution ? " (best)" : "";
1820
        debug.log("$state$best");
1830
        debug.dumpLines(chunks, firstLineIndent, state.splits);
1840
        debug.log();
185
      }
186
1874
      if (attempts++ > _maxAttempts) break;
188
189
      // Try bumping the rule values for rules whose chunks are on long lines.
1902
      state.expand();
191
    }
192
193
    if (debug.traceSplitter) {
1940
      debug.log("$_bestSolution (winner)");
1950
      debug.dumpLines(chunks, firstLineIndent, _bestSolution.splits);
1960
      debug.log();
197
    }
198
1996
    return _bestSolution.splits;
200
  }
201
2022
  void enqueue(SolveState state) {
2034
    _queue.add(state);
204
  }
205
}