001 /*
002 * Cursor.java
003 * Copyright (c) 1998-2008, The University of Sheffield.
004 *
005 * This code is from the GATE project (http://gate.ac.uk/) and is free
006 * software licenced under the GNU General Public License version 3. It is
007 * distributed without any warranty. For more details see COPYING.txt in the
008 * top level directory (or at http://gatewiki.sf.net/COPYING.txt).
009 *
010 * Hamish Cunningham 24th Jan 2009
011 */
012
013 package gate.yam.translate;
014
015 import java.util.*;
016 import java.io.*;
017 import org.springframework.util.StringUtils;
018 import org.apache.log4j.Logger;
019 import gate.util.*;
020 import gate.yam.parse.*;
021 import static gate.yam.translate.NodeKind.*;
022
023 /**
024 * Helper class for pretty-printer. Represents a position in a tree
025 * traversal, and provides manipulation of spacing at that position.
026 * @author Hamish Cunningham
027 */
028 public class Cursor {
029 /** Construction from parse tree root node. */
030 public Cursor(SimpleNode rootNode, int leadingMin, int leadingMax) {
031 init(rootNode, leadingMin, leadingMax);
032 } // Cursor(SimpleNode, int, int)
033
034 /**
035 * Initialisation from parse tree root node. Deals with initial blank
036 * lines, and advances.
037 */
038 void init(SimpleNode rootNode, int leadingMin, int leadingMax) {
039 this.rootNode = rootNode;
040
041 // check that the tree isn't empty
042 if(rootNode == null) { // not much to be done...
043 kind = NodeKind.NULL;
044 return;
045 }
046 numChildren = rootNode.jjtGetNumChildren();
047 if(numChildren == 0) { // nothing in it...
048 kind = NodeKind.NULL;
049 return;
050 }
051
052 // deal with Seps immediately after the title
053 // without changing the cursor (this is done by advance later)
054 SimpleNode child = null;
055 NodeKind childKind = null;
056 LinkedList<ASTSep> sepList = new LinkedList<ASTSep>();
057 do { // make a list of leading Seps
058 child = (SimpleNode) rootNode.jjtGetChild(nextChildNumber++);
059 childKind = findNodeKind(child);
060 if(childKind == SEP)
061 sepList.add((ASTSep) child);
062 } while(nextChildNumber < numChildren && childKind == SEP);
063 if(sepList.size() > 0) { // we got at least one Sep node
064 Token first = sepList.getFirst().getFirstToken();
065 Token last = sepList.getLast().getLastToken();
066 if(last == null) last = first;
067 if(first != null)
068 blanksAfter(first, last, leadingMin, leadingMin, leadingMax);
069 }
070
071 // advance to the first non-Sep
072 nextChildNumber = 0;
073 advance();
074 } // init(SimpleNode, int, int)
075
076 /**
077 * Advance to the next Unit or Word sequence.
078 * (Ignores top-level anchors.)
079 */
080 public void advance() {
081 // if there are no more children, we're finished
082 if(nextChildNumber == numChildren) {
083 log.debug("no more children");
084 previousKind = kind;
085 kind = nextKind = NodeKind.NULL;
086 hereNode = null;
087 return;
088 }
089
090 // previousKind
091 previousKind = kind;
092
093 // find the next Unit or Word (ignoring anchors at top level of units)
094 log.debug("finding next Unit or Word");
095 SimpleNode child = null;
096 SimpleNode previousNode = null;
097 NodeKind childKind = null;
098 do {
099 child = (SimpleNode) rootNode.jjtGetChild(nextChildNumber++);
100 childKind = findNodeKind(child);
101 } while(
102 nextChildNumber < numChildren &&
103 (childKind == SEP || childKind == UNIT_ANCHOR)
104 );
105
106 // no non-Seps?
107 if(
108 nextChildNumber == numChildren &&
109 (childKind == SEP || childKind == UNIT_ANCHOR)
110 ) {
111 log.debug("no non-Seps found");
112 kind = NodeKind.NULL;
113 return;
114 }
115
116 // hereNode, kind
117 hereNode = child;
118 kind = childKind;
119
120 // nextKind
121 nextKind = NodeKind.NULL;
122 int peekChildNumber = nextChildNumber;
123 while(peekChildNumber < numChildren) {
124 child = (SimpleNode) rootNode.jjtGetChild(peekChildNumber++);
125 nextKind = findNodeKind(child);
126 nextNode = child;
127 if(nextKind != SEP && nextKind != UNIT_ANCHOR)
128 break;
129 }
130 } // advance()
131
132 /** Type of the next node after the current Sep sequence. */
133 public NodeKind findNodeKind(SimpleNode n) {
134 log.debug("findNodeKind on n = " + n);
135 if(n == null)
136 return NodeKind.NULL;
137
138 Class nClass = n.getClass();
139 if(nClass == ASTSep.class) {
140 return NodeKind.SEP;
141 } else if(nClass == ASTWord.class) {
142 return NodeKind.WORD;
143 } else if(nClass == ASTUnit.class) {
144 Token t = n.getFirstToken();
145 if( t != null && t.image.trim().startsWith("%include(") )
146 return NodeKind.UNIT_INCLUDE;
147
148 // set the type dependent on the first daughter of the unit
149 int unitChildNumber = 0;
150 int unitNumChildren = n.jjtGetNumChildren();
151 SimpleNode firstDaughter = null;
152 if(unitChildNumber < unitNumChildren) {
153 firstDaughter = (SimpleNode) n.jjtGetChild(unitChildNumber++);
154 }
155 if(firstDaughter == null)
156 return NodeKind.NULL;
157 Class daughterClass = firstDaughter.getClass();
158 log.debug("first daughter class = " + daughterClass);
159
160 if(daughterClass == ASTSectionHead.class) {
161 return NodeKind.UNIT_SECTION;
162 } else if(daughterClass == ASTList.class) {
163 return NodeKind.UNIT_LIST;
164 } else if(daughterClass == ASTParagraph.class) {
165 return NodeKind.UNIT_PARA;
166 } else if(daughterClass == ASTContents.class) {
167 return NodeKind.UNIT_CONTENTS;
168 } else if(daughterClass == ASTAnchor.class) {
169 return NodeKind.UNIT_ANCHOR;
170 } else if(daughterClass == ASTVerbatim.class) {
171 return NodeKind.UNIT_VERBATIM;
172 } else if(daughterClass == ASTTable.class) {
173 return NodeKind.UNIT_TABLE;
174 }
175 }
176
177 log.error(
178 "found non-Sep/Word/Unit node at top level, class was: " + nClass
179 );
180 return NodeKind.NULL;
181 } // findNodeKind(SimpleNode)
182
183 /** Root node of the parse tree. */
184 SimpleNode rootNode;
185
186 /** Number of children below the root node. */
187 int numChildren = 0;
188
189 /** The next node after the current one. */
190 int nextChildNumber = 0;
191
192 /** First node in current position. */
193 SimpleNode hereNode;
194
195 /** First node in current position. */
196 public SimpleNode getHereNode() { return hereNode; }
197
198 /** Position type. */
199 NodeKind kind = NodeKind.NULL;
200
201 /** Position type. */
202 public NodeKind getKind() { return kind; }
203
204 /** Previous position type. */
205 public NodeKind previousKind = NodeKind.NULL;
206
207 /** Previous position type. */
208 public NodeKind getPreviousKind() { return previousKind; }
209
210 /** Next node from current position. */
211 SimpleNode nextNode;
212
213 /** Next node from current position. */
214 public SimpleNode getNextNode() { return nextNode; }
215
216 /** Position type. */
217 /** Next position type. */
218 NodeKind nextKind = NodeKind.NULL;
219
220 /** Next position type. */
221 public NodeKind getNextKind() { return nextKind; }
222
223 /** Finished the document? */
224 public boolean atEnd() {
225 return kind == NodeKind.NULL || nextKind == NodeKind.NULL;
226 } // atEnd()
227
228 /**
229 * Set the number of blank lines following this position.
230 */
231 public void blanksAfter(int min, int max) {
232 Token gapStart = hereNode.getLastToken();
233 Token gapEnd = nextNode.getFirstToken();
234 Token insertPoint = gapStart;
235 int numNewLines = 0;
236 boolean hitMax = false;
237
238 // for each gap token
239 for(Token t = gapStart; t != null; t = t.next) {
240
241 if(t.kind == YamParser.anchorStart) { // anchors
242 continue;
243 } else if(t.kind == YamParser.anchorBody) {
244 /* TODO
245 * maybe:
246 if(t.next != null && t.next.image.startsWith("\n"))
247 numNewLines--; // allow an extra nl after an anchor
248 * or maybe only if prev tok is newline too...
249 * or maybe not at all!
250 */
251 continue;
252 } else if(t.samePlace(gapEnd)) { // next element
253 if(t.next != null && t.next.image.startsWith("\n")) {
254 numNewLines++; // add the line at the start of sections or lists
255 if(numNewLines > max) {
256 // reduce the newlines at insertPoint by 1
257 insertPoint.image = insertPoint.image.replaceFirst("\n", "");
258 }
259 }
260 break;
261 } else if( // comments
262 t.specialToken != null &&
263 t.specialToken.kind == YamParser.singleLineComment
264 ) {
265 continue;
266 } else { // spaces
267 insertPoint = t;
268 int tLines = StringUtils.countOccurrencesOf(t.image, "\n");
269
270 // deal with the newlines in t
271 if(hitMax) { // remove all newlines in t
272 t.image = t.image.replaceAll("\n", "");
273 } else if(numNewLines + tLines > max) { // reduce lines in t to max
274 int surplus = (numNewLines + tLines) - max;
275 for(int i = 0; i < surplus; i++)
276 t.image = t.image.replaceFirst("\n", "");
277
278 hitMax = true;
279 numNewLines = max;
280 } else { // add tLines to total
281 numNewLines += tLines;
282 }
283 }
284 } // for each gap token
285
286 if(numNewLines < min) { // add extra spaces at end of insertPoint.image
287 int missing = min - numNewLines;
288 for(int i = 0; i < missing; i++)
289 insertPoint.image = insertPoint.image.concat("\n");
290 }
291 /* TODO
292 * maybe something like:
293 if(DEBUG) {
294 gapEnd.image = gapEnd.image +
295 "\n\n" + kind + " norm=" + norm + " min=" + min +
296 " max=" + max + " current=" + numNewLines + " next=" +
297 nextKind + " (prev=" + previousKind + ")" +
298 "\ngapStart=" + sedNls(savedStart.image) + " gapEnd=" + sedNls(gapEnd.image) +
299 "\ngapStart=" + tokDetails(savedStart) + " gapEnd=" + tokDetails(gapEnd) +
300 "\ngapEnd.next.image=" + sedNls(gapEnd.next.image) + " s=" + sedNls(s.toString()) +
301 " next details=" + tokDetails(gapEnd.next) +
302 "\n\n";
303 return;
304 }
305 */
306
307 } // blanksAfter(int, int)
308
309 /**
310 * Set the number of blank lines following this position.
311 */
312 public void blanksAfter(int norm, int min, int max) {
313 blanksAfter(min, max);
314 if(true) return;
315
316
317
318 Token gapStart = hereNode.getFirstToken();
319 Token gapEnd = hereNode.getLastToken();
320
321 if(gapStart == null && gapEnd == null) {
322 log.debug("null gap start and end");
323 return;
324 } else if(gapStart == null) {
325 log.debug("null gap start");
326 gapStart = gapEnd;
327 } else if(gapEnd == null) {
328 log.debug("null gap end");
329 gapEnd = gapStart;
330 }
331
332 if(gapStart == null) {
333 log.debug("fatal null gap start");
334 return;
335 }
336
337 List<Token> tokens = new ArrayList<Token>();
338 do {
339 tokens.add(gapStart);
340 gapStart = gapStart.next;
341 } while(gapStart != gapEnd && gapStart != null);
342 ListIterator<Token> i = tokens.listIterator(tokens.size());
343 while(i.hasPrevious() && ! StringUtils.hasText(gapStart.image))
344 gapStart = i.previous();
345
346 blanksAfter(gapStart, gapEnd, norm, min, max);
347 } // blanksAfter(int,int,int)
348
349 /**
350 * Set the number of blank lines in a token sequence.
351 * Tokens gapStart and gapEnd must not be null.
352 */
353 public void blanksAfter(Token gapStart, Token gapEnd, int norm, int min, int max) {
354 // TODO
355 // - make the numbers equal blank lines, not newlines? (and change PT.fU2)
356
357 log.debug("blanksAfter: " + norm);
358 Token savedStart = gapStart;
359 Token t = gapStart;
360
361 // how many newlines in this gap?
362 int numNewLines = 0;
363 StringBuilder s = new StringBuilder();
364 s.append(t.image);
365 numNewLines += StringUtils.countOccurrencesOf(t.image, "\n");
366 while(t != null && ! Token.samePlace(t, gapEnd)) {
367 t = t.next;
368
369 // ignore single new lines on space-bearing tokens that have comments
370 // (we assume that these are related to the comments themselves)
371 if(t.specialToken != null) continue;
372
373 s.append(t.image);
374 numNewLines += StringUtils.countOccurrencesOf(t.image, "\n");
375 }
376
377 // deal with following leading space containing nl
378 t = gapEnd;
379 if(
380 t.next != null && t.next.image.startsWith("\n") &&
381 t.next.specialToken == null
382 ) {
383 numNewLines++;
384 s.append("\n");
385 }
386
387 if(DEBUG) {
388 gapEnd.image = gapEnd.image +
389 "\n\n" + kind + " norm=" + norm + " min=" + min +
390 " max=" + max + " current=" + numNewLines + " next=" +
391 nextKind + " (prev=" + previousKind + ")" +
392 "\ngapStart=" + sedNls(savedStart.image) + " gapEnd=" + sedNls(gapEnd.image) +
393 "\ngapStart=" + tokDetails(savedStart) + " gapEnd=" + tokDetails(gapEnd) +
394 "\ngapEnd.next.image=" + sedNls(gapEnd.next.image) + " s=" + sedNls(s.toString()) +
395 " next details=" + tokDetails(gapEnd.next) +
396 "\n\n";
397 return;
398 }
399
400 if(numNewLines >= min && numNewLines <= max)
401 return;
402
403 gapStart = savedStart;
404 while(gapStart != gapEnd && gapStart != null && gapEnd != null) {
405 gapStart.image = gapStart.image.replaceAll("\n", "");
406 gapStart = gapStart.next;
407 }
408 gapEnd.image = gapEnd.image.replaceAll("\n", "");
409 savedStart.image = savedStart.image + getBlanks(norm);
410 } // blanksAfter(Token,Token,int,int,int)
411
412 /** Helper */
413 String sedNls(String s) { return s.replaceAll("\n", "\\\\n"); }
414
415 /** Helper */
416 String tokDetails(Token t) {
417 String spec = "";
418 if(t.specialToken != null)
419 spec = t.specialToken.image;
420
421 if(t != null)
422 return
423 t.beginLine + " " + t.beginColumn + " " + t.endLine + " " + t.endColumn
424 + " (spec=" + spec + ")";
425 else
426 return "";
427 }
428
429 /**
430 * Get a newline string for a particular number of blanks.
431 * Note that 0 blanks is defined as equal to 1 newline; 1 blank is two
432 * newlines, and so on. (I.e. we assume that there's at least one newline at
433 * each point between units.)
434 */
435 String getBlanks(int i) {
436 switch(i) {
437 case 0: return "\n";
438 case 1: return "\n\n";
439 case 2: return "\n\n\n";
440 case 3: return "\n\n\n\n";
441 case 4: return "\n\n\n\n\n";
442 case 5: return "\n\n\n\n\n\n";
443 default: return "\n";
444 }
445 } // getBlanks(i)
446
447 /** Logger */
448 static Logger log =
449 Logger.getLogger("gate.yam.translate.Cursor");
450
451 /** Debug mode. */
452 static final boolean DEBUG = true;
453
454 } // Cursor
|