1   // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file

2   // for details. All rights reserved. Use of this source code is governed by a

3   // BSDstyle license that can be found in the LICENSE file.

4  

5   library dart_style.src.line_splitting.line_splitter;

6  

7   import '../chunk.dart';

8   import '../debug.dart' as debug;

9   import '../line_writer.dart';

10   import '../rule/rule.dart';

11   import 'rule_set.dart';

12   import 'solve_state.dart';

13   import 'solve_state_queue.dart';

14  

15   /// To ensure the solver doesn't go totally pathological on giant code, we cap

16   /// it at a fixed number of attempts.

17   ///

18   /// If the optimal solution isn't found after this many tries, it just uses the

19   /// best it found so far.

20   const _maxAttempts = 5000;

21  

22   /// Takes a set of chunks and determines the best values for its rules in order

23   /// to fit it inside the page boundary.

24   ///

25   /// This problem is exponential in the number of rules and a single expression

26   /// in Dart can be quite large, so it isn't feasible to brute force this. For

27   /// example:

28   ///

29   /// outer(

30   /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),

31   /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),

32   /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),

33   /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8));

34   ///

35   /// There are 509,607,936 ways this can be split.

36   ///

37   /// The problem is even harder because we may not be able to easily tell if a

38   /// given solution is the best one. It's possible that there is *no* solution

39   /// that fits in the page (due to long strings or identifiers) so the winning

40   /// solution may still have overflow characters. This makes it hard to know

41   /// when we are done and can stop looking.

42   ///

43   /// There are a couple of pieces of domain knowledge we use to cope with this:

44   ///

45   ///  Changing a rule from unsplit to split will never lower its cost. A

46   /// solution with all rules unsplit will always be the one with the lowest

47   /// cost (zero). Conversely, setting all of its rules to the maximum split

48   /// value will always have the highest cost.

49   ///

50   /// (You might think there is a converse rule about overflow characters. The

51   /// solution with the fewest splits will have the most overflow, and the

52   /// solution with the most splits will have the least overflow. Alas, because

53   /// of indentation, that isn't always the case. Adding a split may *increase*

54   /// overflow in some cases.)

55   ///

56   ///  If all of the chunks for a rule are inside lines that already fit in the

57   /// page, then splitting that rule will never improve the solution.

58   ///

59   ///  If two partial solutions have the same cost and the bound rules don't

60   /// affect any of the remaining unbound rules, then whichever partial

61   /// solution is currently better will always be the winner regardless of what

62   /// the remaining unbound rules are bound to.

63   ///

64   /// We start off with a [SolveState] where all rules are unbound (which

65   /// implicitly treats them as unsplit). For a given solve state, we can produce

66   /// a set of expanded states that takes some of the rules in the first long

67   /// line and bind them to split values. This always produces new solve states

68   /// with higher cost (but often fewer overflow characters) than the parent

69   /// state.

70   ///

71   /// We take these expanded states and add them to a work list sorted by cost.

72   /// Since unsplit rules always have lower cost solutions, we know that no state

73   /// we enqueue later will ever have a lower cost than the ones we already have

74   /// enqueued.

75   ///

76   /// Then we keep pulling states off the work list and expanding them and adding

77   /// the results back into the list. We do this until we hit a solution where

78   /// all characters fit in the page. The first one we find will have the lowest

79   /// cost and we're done.

80   ///

81   /// We also keep running track of the best solution we've found so far that

82   /// has the fewest overflow characters and the lowest cost. If no solution fits,

83   /// we'll use this one.

84   ///

85   /// When enqueing a solution, we can sometimes collapse it and a previously

86   /// queued one by preferring one or the other. If two solutions have the same

87   /// cost and we can prove that they won't diverge later as unbound rules are

88   /// set, we can pick the winner now and discard the other. This lets us avoid

89   /// redundantly exploring entire subtrees of the solution space.

90   ///

91   /// As a final escape hatch for pathologically nasty code, after trying some

92   /// fixed maximum number of solve states, we just bail and return the best

93   /// solution found so far.

94   ///

95   /// Even with the above algorithmic optimizations, complex code may still

96   /// require a lot of exploring to find an optimal solution. To make that fast,

97   /// this code is carefully profiled and optimized. If you modify this, make

98   /// sure to test against the benchmark to ensure you don't regress performance.

99   class LineSplitter {

100   final LineWriter writer;

101  

102   /// The list of chunks being split.

103   final List<Chunk> chunks;

104  

105   /// The set of soft rules whose values are being selected.

106   final List<Rule> rules;

107  

108   /// The number of characters of additional indentation to apply to each line.

109   ///

110   /// This is used when formatting blocks to get the output into the right

111   /// column based on where the block appears.

112   final int blockIndentation;

113  

114   /// The starting column of the first line.

115   final int firstLineIndent;

116  

117   /// The queue of solve states to explore further.

118   ///

119   /// This is sorted lowestcost first. This ensures that as soon as we find a

120   /// solution that fits in the page, we know it will be the lowest cost one

121   /// and can stop looking.

122   final _queue = new SolveStateQueue();

123  

124   /// The lowest cost solution found so far.

125   SolveState _bestSolution;

126  

127   /// Creates a new splitter for [_writer] that tries to fit [chunks] into the

128   /// page width.

129  3  LineSplitter(this.writer, List<Chunk> chunks, int blockIndentation,

130   int firstLineIndent,

131   {bool flushLeft: false})

132   : chunks = chunks,

133   // Collect the set of rules that we need to select values for.

134   rules = chunks

135  9  .map((chunk) => chunk.rule)

136  6  .where((rule) => rule != null)

137  3  .toSet()

138  3  .toList(growable: false),

139   blockIndentation = blockIndentation,

140  3  firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation {

141  6  _queue.bindSplitter(this);

142  

143   // Store the rule's index in the rule so we can get from a chunk to a rule

144   // index quickly.

145  12  for (var i = 0; i < rules.length; i++) {

146  9  rules[i].index = i;

147   }

148  

149   // Now that every used rule has an index, tell the rules to discard any

150   // constraints on unindexed rules.

151  6  for (var rule in rules) {

152  3  rule.forgetUnusedRules();

153   }

154   }

155  

156   /// Determine the best way to split the chunks into lines that fit in the

157   /// page, if possible.

158   ///

159   /// Returns a [SplitSet] that defines where each split occurs and the

160   /// indentation of each line.

161   ///

162   /// [firstLineIndent] is the number of characters of whitespace to prefix the

163   /// first line of output with.

164  3  SplitSet apply() {

165   // Start with a completely unbound, unsplit solution.

166  18  _queue.add(new SolveState(this, new RuleSet(rules.length)));

167  

168   var attempts = 0;

169  6  while (_queue.isNotEmpty) {

170  6  var state = _queue.removeFirst();

171  

172  6  if (state.isBetterThan(_bestSolution)) {

173  3  _bestSolution = state;

174  

175   // Since we sort solutions by cost the first solution we find that

176   // fits is the winner.

177  9  if (_bestSolution.overflowChars == 0) break;

178   }

179  

180   if (debug.traceSplitter) {

181  0  var best = state == _bestSolution ? " (best)" : "";

182  0  debug.log("$state$best");

183  0  debug.dumpLines(chunks, firstLineIndent, state.splits);

184  0  debug.log();

185   }

186  

187  4  if (attempts++ > _maxAttempts) break;

188  

189   // Try bumping the rule values for rules whose chunks are on long lines.

190  2  state.expand();

191   }

192  

193   if (debug.traceSplitter) {

194  0  debug.log("$_bestSolution (winner)");

195  0  debug.dumpLines(chunks, firstLineIndent, _bestSolution.splits);

196  0  debug.log();

197   }

198  

199  6  return _bestSolution.splits;

200   }

201  

202  2  void enqueue(SolveState state) {

203  4  _queue.add(state);

204   }

205   }
