1 | | // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
|
2 | | // for details. All rights reserved. Use of this source code is governed by a
|
3 | | // BSD-style license that can be found in the LICENSE file.
|
4 | |
|
5 | | library dart_style.src.line_splitting.line_splitter;
|
6 | |
|
7 | | import '../chunk.dart';
|
8 | | import '../debug.dart' as debug;
|
9 | | import '../line_writer.dart';
|
10 | | import '../rule/rule.dart';
|
11 | | import 'rule_set.dart';
|
12 | | import 'solve_state.dart';
|
13 | | import 'solve_state_queue.dart';
|
14 | |
|
15 | | /// To ensure the solver doesn't go totally pathological on giant code, we cap
|
16 | | /// it at a fixed number of attempts.
|
17 | | ///
|
18 | | /// If the optimal solution isn't found after this many tries, it just uses the
|
19 | | /// best it found so far.
|
20 | | const _maxAttempts = 5000;
|
21 | |
|
22 | | /// Takes a set of chunks and determines the best values for its rules in order
|
23 | | /// to fit it inside the page boundary.
|
24 | | ///
|
25 | | /// This problem is exponential in the number of rules and a single expression
|
26 | | /// in Dart can be quite large, so it isn't feasible to brute force this. For
|
27 | | /// example:
|
28 | | ///
|
29 | | /// outer(
|
30 | | /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
|
31 | | /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
|
32 | | /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
|
33 | | /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8));
|
34 | | ///
|
35 | | /// There are 509,607,936 ways this can be split.
|
36 | | ///
|
37 | | /// The problem is even harder because we may not be able to easily tell if a
|
38 | | /// given solution is the best one. It's possible that there is *no* solution
|
39 | | /// that fits in the page (due to long strings or identifiers) so the winning
|
40 | | /// solution may still have overflow characters. This makes it hard to know
|
41 | | /// when we are done and can stop looking.
|
42 | | ///
|
43 | | /// There are a couple of pieces of domain knowledge we use to cope with this:
|
44 | | ///
|
45 | | /// - Changing a rule from unsplit to split will never lower its cost. A
|
46 | | /// solution with all rules unsplit will always be the one with the lowest
|
47 | | /// cost (zero). Conversely, setting all of its rules to the maximum split
|
48 | | /// value will always have the highest cost.
|
49 | | ///
|
50 | | /// (You might think there is a converse rule about overflow characters. The
|
51 | | /// solution with the fewest splits will have the most overflow, and the
|
52 | | /// solution with the most splits will have the least overflow. Alas, because
|
53 | | /// of indentation, that isn't always the case. Adding a split may *increase*
|
54 | | /// overflow in some cases.)
|
55 | | ///
|
56 | | /// - If all of the chunks for a rule are inside lines that already fit in the
|
57 | | /// page, then splitting that rule will never improve the solution.
|
58 | | ///
|
59 | | /// - If two partial solutions have the same cost and the bound rules don't
|
60 | | /// affect any of the remaining unbound rules, then whichever partial
|
61 | | /// solution is currently better will always be the winner regardless of what
|
62 | | /// the remaining unbound rules are bound to.
|
63 | | ///
|
64 | | /// We start off with a [SolveState] where all rules are unbound (which
|
65 | | /// implicitly treats them as unsplit). For a given solve state, we can produce
|
66 | | /// a set of expanded states that takes some of the rules in the first long
|
67 | | /// line and bind them to split values. This always produces new solve states
|
68 | | /// with higher cost (but often fewer overflow characters) than the parent
|
69 | | /// state.
|
70 | | ///
|
71 | | /// We take these expanded states and add them to a work list sorted by cost.
|
72 | | /// Since unsplit rules always have lower cost solutions, we know that no state
|
73 | | /// we enqueue later will ever have a lower cost than the ones we already have
|
74 | | /// enqueued.
|
75 | | ///
|
76 | | /// Then we keep pulling states off the work list and expanding them and adding
|
77 | | /// the results back into the list. We do this until we hit a solution where
|
78 | | /// all characters fit in the page. The first one we find will have the lowest
|
79 | | /// cost and we're done.
|
80 | | ///
|
81 | | /// We also keep running track of the best solution we've found so far that
|
82 | | /// has the fewest overflow characters and the lowest cost. If no solution fits,
|
83 | | /// we'll use this one.
|
84 | | ///
|
85 | | /// When enqueing a solution, we can sometimes collapse it and a previously
|
86 | | /// queued one by preferring one or the other. If two solutions have the same
|
87 | | /// cost and we can prove that they won't diverge later as unbound rules are
|
88 | | /// set, we can pick the winner now and discard the other. This lets us avoid
|
89 | | /// redundantly exploring entire subtrees of the solution space.
|
90 | | ///
|
91 | | /// As a final escape hatch for pathologically nasty code, after trying some
|
92 | | /// fixed maximum number of solve states, we just bail and return the best
|
93 | | /// solution found so far.
|
94 | | ///
|
95 | | /// Even with the above algorithmic optimizations, complex code may still
|
96 | | /// require a lot of exploring to find an optimal solution. To make that fast,
|
97 | | /// this code is carefully profiled and optimized. If you modify this, make
|
98 | | /// sure to test against the benchmark to ensure you don't regress performance.
|
99 | | class LineSplitter {
|
100 | | final LineWriter writer;
|
101 | |
|
102 | | /// The list of chunks being split.
|
103 | | final List<Chunk> chunks;
|
104 | |
|
105 | | /// The set of soft rules whose values are being selected.
|
106 | | final List<Rule> rules;
|
107 | |
|
108 | | /// The number of characters of additional indentation to apply to each line.
|
109 | | ///
|
110 | | /// This is used when formatting blocks to get the output into the right
|
111 | | /// column based on where the block appears.
|
112 | | final int blockIndentation;
|
113 | |
|
114 | | /// The starting column of the first line.
|
115 | | final int firstLineIndent;
|
116 | |
|
117 | | /// The queue of solve states to explore further.
|
118 | | ///
|
119 | | /// This is sorted lowest-cost first. This ensures that as soon as we find a
|
120 | | /// solution that fits in the page, we know it will be the lowest cost one
|
121 | | /// and can stop looking.
|
122 | | final _queue = new SolveStateQueue();
|
123 | |
|
124 | | /// The lowest cost solution found so far.
|
125 | | SolveState _bestSolution;
|
126 | |
|
127 | | /// Creates a new splitter for [_writer] that tries to fit [chunks] into the
|
128 | | /// page width.
|
129 | 3 | LineSplitter(this.writer, List<Chunk> chunks, int blockIndentation,
|
130 | | int firstLineIndent,
|
131 | | {bool flushLeft: false})
|
132 | | : chunks = chunks,
|
133 | | // Collect the set of rules that we need to select values for.
|
134 | | rules = chunks
|
135 | 9 | .map((chunk) => chunk.rule)
|
136 | 6 | .where((rule) => rule != null)
|
137 | 3 | .toSet()
|
138 | 3 | .toList(growable: false),
|
139 | | blockIndentation = blockIndentation,
|
140 | 3 | firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation {
|
141 | 6 | _queue.bindSplitter(this);
|
142 | |
|
143 | | // Store the rule's index in the rule so we can get from a chunk to a rule
|
144 | | // index quickly.
|
145 | 12 | for (var i = 0; i < rules.length; i++) {
|
146 | 9 | rules[i].index = i;
|
147 | | }
|
148 | |
|
149 | | // Now that every used rule has an index, tell the rules to discard any
|
150 | | // constraints on unindexed rules.
|
151 | 6 | for (var rule in rules) {
|
152 | 3 | rule.forgetUnusedRules();
|
153 | | }
|
154 | | }
|
155 | |
|
156 | | /// Determine the best way to split the chunks into lines that fit in the
|
157 | | /// page, if possible.
|
158 | | ///
|
159 | | /// Returns a [SplitSet] that defines where each split occurs and the
|
160 | | /// indentation of each line.
|
161 | | ///
|
162 | | /// [firstLineIndent] is the number of characters of whitespace to prefix the
|
163 | | /// first line of output with.
|
164 | 3 | SplitSet apply() {
|
165 | | // Start with a completely unbound, unsplit solution.
|
166 | 18 | _queue.add(new SolveState(this, new RuleSet(rules.length)));
|
167 | |
|
168 | | var attempts = 0;
|
169 | 6 | while (_queue.isNotEmpty) {
|
170 | 6 | var state = _queue.removeFirst();
|
171 | |
|
172 | 6 | if (state.isBetterThan(_bestSolution)) {
|
173 | 3 | _bestSolution = state;
|
174 | |
|
175 | | // Since we sort solutions by cost the first solution we find that
|
176 | | // fits is the winner.
|
177 | 9 | if (_bestSolution.overflowChars == 0) break;
|
178 | | }
|
179 | |
|
180 | | if (debug.traceSplitter) {
|
181 | 0 | var best = state == _bestSolution ? " (best)" : "";
|
182 | 0 | debug.log("$state$best");
|
183 | 0 | debug.dumpLines(chunks, firstLineIndent, state.splits);
|
184 | 0 | debug.log();
|
185 | | }
|
186 | |
|
187 | 4 | if (attempts++ > _maxAttempts) break;
|
188 | |
|
189 | | // Try bumping the rule values for rules whose chunks are on long lines.
|
190 | 2 | state.expand();
|
191 | | }
|
192 | |
|
193 | | if (debug.traceSplitter) {
|
194 | 0 | debug.log("$_bestSolution (winner)");
|
195 | 0 | debug.dumpLines(chunks, firstLineIndent, _bestSolution.splits);
|
196 | 0 | debug.log();
|
197 | | }
|
198 | |
|
199 | 6 | return _bestSolution.splits;
|
200 | | }
|
201 | |
|
202 | 2 | void enqueue(SolveState state) {
|
203 | 4 | _queue.add(state);
|
204 | | }
|
205 | | }
|