001 /*
002 * PrettyTranslator.java
003 * Copyright (c) 1998-2008, The University of Sheffield.
004 *
005 * This code is from the GATE project (http://gate.ac.uk/) and is free
006 * software licenced under the GNU General Public License version 3. It is
007 * distributed without any warranty. For more details see COPYING.txt in the
008 * top level directory (or at http://gatewiki.sf.net/COPYING.txt).
009 *
010 * Hamish Cunningham 20th Jan 2009
011 */
012
013 package gate.yam.translate;
014
015 import java.util.*;
016 import java.io.*;
017 import org.springframework.util.StringUtils;
018 import gate.*;
019 import gate.util.*;
020 import gate.yam.parse.*;
021 import gate.yam.format.*;
022 import static gate.yam.translate.NodeKind.*;
023
024 /**
025 * This class is a pretty-printer for YAM documents. It takes a parse tree as
026 * input, munges the whitespace in the Token images, and prints it to the
027 * supplied writer.
028 * @author Hamish Cunningham
029 */
030 public class PrettyTranslator extends AbstractTranslator {
031
032 /** Construction. */
033 public PrettyTranslator() {
034 } // PrettyTranslator()
035
036 /** Get the path to the preamble resource. Unused. */
037 public String getPreamblePath() { return null; }
038
039 /** Array mapping node type name to start/end strings. Unused. */
040 public String[][] getConstantsTable() { return null; }
041
042 /** Array mapping predicate type name to attributes. Unused. */
043 public Object[][] getPredicatesTable() { return null; }
044
045 /** Are we munging, or just recreating the original? */
046 boolean identityFunction = false;
047
048 /**
049 * A help class for storing annotation data before creating real
050 * annotations.
051 */
052 class AnnotationData {
053 AnnotationData(Long start, Long end, String type, FeatureMap features) {
054 this.start = start;
055 this.end = end;
056 this.type = type;
057 this.features = features;
058 } // AnnotationData(Long,Long,String,FeatureMap)
059
060 Long start;
061 Long end;
062 String type;
063 FeatureMap features;
064
065 public String toString() {
066 return start + " " + end + " " + type + " " + features;
067 }
068 } // class AnnotationData
069
070 /**
071 * Translation. Results are written to the Writer, which is also returned.
072 */
073 public Writer translate() throws GateException {
074 SimpleNode rootNode = parseTree.getRootNode();
075 SimpleNode titleNode = parseTree.getTitleNode();
076 if(titleNode != null)
077 log.debug("titleNode class:" + titleNode.getClass().getSimpleName());
078
079 StringBuilder content = new StringBuilder();
080 List<AnnotationData> annotations = new ArrayList<AnnotationData>();
081 String nodeImage = "";
082
083 // set the start/end offsets on the tokens
084 int offset = 0;
085 if(titleNode != null)
086 offset = getPlainNodeImage(titleNode, true, 0, annotations).length();
087 for(int i = 0, j = rootNode.jjtGetNumChildren(); i < j; i++) {
088 SimpleNode child = (SimpleNode) rootNode.jjtGetChild(i);
089 offset += getPlainNodeImage(child, true, offset, annotations).length();
090 }
091
092 // deal with the title node
093 if(titleNode != null) {
094 nodeImage = getPlainNodeImage(titleNode, false, -1);
095 content.append(nodeImage);
096 AnnotationData annData = new AnnotationData( // annotate the title
097 (long) 0, (long) content.length(), "Title", Factory.newFeatureMap()
098 );
099 annotations.add(annData);
100 }
101
102 // traverse the main parse tree, constructing an image of the document
103 // and collecting a list of annotations (offset/features)
104 for(int i = 0, j = rootNode.jjtGetNumChildren(); i < j; i++) {
105 boolean firstPass = true;
106 SimpleNode child = (SimpleNode) rootNode.jjtGetChild(i);
107 if(child.getClass() == ASTWord.class) // error nodes
108 content.append(getPlainNodeImage(child, false, -1));
109 traverse(child, content, annotations, firstPass, false /*nestedUnit*/);
110 }
111
112 // create a GATE doc and add annotation to it
113 Document doc = Factory.newDocument(content.toString());
114 AnnotationSet yamAnnots = doc.getAnnotations("YAM Constituents");
115 for(AnnotationData annData : annotations) {
116 try {
117 yamAnnots.add(
118 annData.start, annData.end, annData.type, annData.features
119 );
120 } catch(GateException e) {
121 System.err.println("oops! " + e + " " + annData);
122 }
123 }
124
125 // call YamFormatter to do the fixups
126 YamFormatter.normalise(doc, content);
127 // TODO remove debug code:
128 if(true) {
129 try {
130 File outFile = new File(
131 System.getProperty("user.dir"), "pretty-" +
132 doc.getName().replaceAll(" ", "-") + ".xml"
133 );
134 Writer docWriter = new PrintWriter(outFile);
135 docWriter.write(doc.toXml());
136 docWriter.flush();
137 } catch(Exception e) {
138 log.error("oops: " + e);
139 }
140 }
141
142 // print the fixed-up doc content on the writer and free the resource
143 pr(content.toString());
144 //TODO
145 if(! pleaseLeakLotsOfMemory)
146 Factory.deleteResource(doc);
147
148 return writer;
149 } // translate()
150
151 /**
152 * For debugging, set this true and the GATE docs that the translator
153 * creates will not be deleted. A fun trick to play on your colleagues is to
154 * set this true and then deploy into your favourite web container and sit
155 * back and wait for the screams from the systems admin team.
156 */
157 public static boolean pleaseLeakLotsOfMemory = false;
158
159 /** Collect annotation data and content dominated by the input node. */
160 void traverse(
161 SimpleNode n, StringBuilder content, List<AnnotationData> annotations,
162 boolean firstPass, boolean nestedUnit
163 ) {
164
165 long startingPoint = (long) content.length(); // start offset
166 String nodeImage = getPlainNodeImage(n, false, -1); // current node text
167 int nodeImageLen = nodeImage.length(); // ...and length
168 Class nClass = n.getClass(); // current node class
169 int trailingSpaceLen = 0; // how much trailing space
170
171 if(nClass == ASTUnit.class) { // units to finish before trailing space
172 trailingSpaceLen =
173 nodeImageLen - StringUtils.trimTrailingWhitespace(nodeImage).length();
174 } else if(firstPass && nClass == ASTSep.class) { // seps after title
175 content.append(nodeImage);
176 }
177
178 // record the data for annotating constituents
179 Token firstTok = n.getFirstToken();
180 Token lastTok = n.getLastToken();
181 FeatureMap features = Factory.newFeatureMap();
182 AnnotationData annData = new AnnotationData(
183 /* start */ (long) firstTok.startOffset,
184 /* end */ (long) lastTok.endOffset - trailingSpaceLen,
185 /* type */ nClass.getSimpleName().substring(3),
186 /* features */ features
187 );
188 annotations.add(annData);
189
190 // annotate the nested nodes (if any)
191 for(int i = 0, j = n.jjtGetNumChildren(); i < j; i++) {
192 SimpleNode child = (SimpleNode) n.jjtGetChild(i);
193 Token t = child.getFirstToken();
194
195 // don't recurse on includes
196 if(t != null && (! t.image.trim().startsWith("%include("))) {
197 boolean childIsUnit = ( child.getClass() == ASTUnit.class );
198 traverse(child, content, annotations, false, childIsUnit);
199 }
200 }
201
202 // append to the content for the Unit images
203 if(nClass == ASTUnit.class && ! nestedUnit) {
204 content.append(nodeImage);
205 if(n.jjtGetNumChildren() > 0) // add type feature to the unit annots
206 features.put(
207 "type",
208 ((SimpleNode) n.jjtGetChild(0))
209 .getClass().getSimpleName().substring(3)
210 );
211 }
212 } // traverse(SimpleNode, StringBuilder, List<>, boolean, boolean)
213
214 /**
215 * Get a plain node's image from its tokens. See {@link
216 * #getPlainNodeImage(gate.yam.parse.SimpleNode, boolean, int, java.util.List)}
217 */
218 String getPlainNodeImage(SimpleNode n, boolean setOffsets, int offset) {
219 return getPlainNodeImage(n, setOffsets, offset, null);
220 } // getPlainNodeImage(SimpleNode,boolean,int)
221
222 /**
223 * Get a plain node's image from its tokens. Also functions to set the
224 * (absolute) offsets on tokens when <tt>offset</tt> is set true, and when
225 * run a single pass across contiguous nodes.
226 * @param n the node to take an image of
227 * @param setOffsets whether to set start/end offsets on the tokens or not
228 * @param offset the absolute offset we're starting from
229 * @param annotations if non-null comment annotations will be accumulated
230 * here
231 * @return the image, and where appropriate new offsets on the token stream
232 * and/or comment annotations
233 */
234 String getPlainNodeImage(
235 SimpleNode n, boolean setOffsets, int offset,
236 List<AnnotationData> annotations)
237 {
238
239 StringBuilder s = new StringBuilder();
240 if(n == null) return "";
241 Token first = n.getFirstToken();
242 if(first == null) return "";
243 Token last = n.getLastToken();
244 if(last == null) last = first;
245
246 while(first != last && first != null) {
247 offset = processToken(first, offset, s, setOffsets, annotations);
248 first = first.next;
249 }
250 if( last != null ) {
251 offset = processToken(last, offset, s, setOffsets, annotations);
252 }
253
254 return s.toString();
255 } // getPlainNodeImage(SimpleNode,boolean,int,List<AnnotationData>)
256
257 int processToken(
258 Token t, int offset, StringBuilder s, boolean setOffsets,
259 List<AnnotationData> annotations
260 ) {
261 String commentImage = getCommentImage(t);
262 if(setOffsets)
263 t.startOffset = offset;
264 s.append(commentImage);
265 if(annotations != null && commentImage.length() > 0) {
266 AnnotationData annData = new AnnotationData(
267 (long) offset, (long) offset + commentImage.length(),
268 "Comment", Factory.newFeatureMap()
269 );
270 annotations.add(annData);
271 }
272 s.append(t.image);
273 if(setOffsets) {
274 offset += commentImage.length() + t.image.length();
275 t.endOffset = offset;
276 }
277
278 return offset;
279 } // processToken(Token, int, StringBuilder, boolean, List<AnnotationData>)
280
281 /** Get the image of a comment. */
282 String getCommentImage(Token t) {
283 StringBuilder commentImage = new StringBuilder();
284 Token special = null;
285
286 if(t != null)
287 special = t.specialToken;
288 while(special != null) {
289 commentImage.insert(0, special.image);
290 special = special.specialToken;
291 }
292
293 return commentImage.toString();
294 } // getCommentImage(Token)
295
296 /** Process URLs and Anchors. */
297 public void processURLs(SimpleNode node) throws GateException {
298 // intentionally empty
299 } // processURLs(SimpleNode)
300
301 } // PrettyTranslator
|