0001 /*
0002 JSPWikiMarkupParser.java
0003
0004 This is a severely hacked-up version of the original class from
0005 <a href="http://jspwiki.org/">JSPWiki</a>. It is used to perform a simple
0006 conversion to HTML for content using JSPWiki format.
0007
0008 JSPWiki - a JSP-based WikiWiki clone.
0009
0010 Copyright (C) 2001-2005 Janne Jalkanen (Janne.Jalkanen@iki.fi)
0011
0012 This program is free software; you can redistribute it and/or modify
0013 it under the terms of the GNU Lesser General Public License as published by
0014 the Free Software Foundation; either version 2.1 of the License, or
0015 (at your option) any later version.
0016
0017 This program is distributed in the hope that it will be useful,
0018 but WITHOUT ANY WARRANTY; without even the implied warranty of
0019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0020 GNU Lesser General Public License for more details.
0021
0022 You should have received a copy of the GNU Lesser General Public License
0023 along with this program; if not, write to the Free Software
0024 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0025 */
0026
0027 package gate.yam.convert;
0028
0029 import java.io.*;
0030 import java.util.*;
0031
0032 import javax.xml.transform.Result;
0033
0034 import org.apache.commons.lang.StringEscapeUtils;
0035 import org.apache.commons.lang.StringUtils;
0036 import org.apache.log4j.Logger;
0037 import org.apache.oro.text.GlobCompiler;
0038 import org.apache.oro.text.regex.*;
0039 import org.jdom.*;
0040 import org.jdom.xpath.XPath;
0041
0042 /**
0043 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the
0044 * heart and soul of JSPWiki : make sure you test properly anything that is
0045 * added, or else it breaks down horribly.
0046 *
0047 * @author Janne Jalkanen
0048 * @since 2.4
0049 */
0050 public class JSPWikiMarkupParser{
0051 ///////////////////////////MARKUP PARSER
0052 /** Allow this many characters to be pushed back in the stream. In effect,
0053 this limits the size of a single line. */
0054 protected static final int PUSHBACK_BUFFER_SIZE = 10*1024;
0055 protected PushbackReader m_in;
0056 private int m_pos = -1; // current position in reader stream
0057
0058 //protected WikiEngine m_engine;
0059 //protected WikiContext m_context;
0060
0061 /** Optionally stores internal wikilinks */
0062 protected ArrayList m_localLinkMutatorChain = new ArrayList();
0063 protected ArrayList m_externalLinkMutatorChain = new ArrayList();
0064 protected ArrayList m_attachmentLinkMutatorChain = new ArrayList();
0065 protected ArrayList m_headingListenerChain = new ArrayList();
0066 protected ArrayList m_linkMutators = new ArrayList();
0067
0068 protected boolean m_inlineImages = true;
0069
0070 protected boolean m_parseAccessRules = true;
0071 /** If set to "true", allows using raw HTML within Wiki text. Be warned,
0072 this is a VERY dangerous option to set - never turn this on in a publicly
0073 allowable Wiki, unless you are absolutely certain of what you're doing. */
0074 public static final String PROP_ALLOWHTML = "jspwiki.translatorReader.allowHTML";
0075 /** If set to "true", enables plugins during parsing */
0076 public static final String PROP_RUNPLUGINS = "jspwiki.translatorReader.runPlugins";
0077
0078 /** Lists all punctuation characters allowed in WikiMarkup. These
0079 will not be cleaned away. */
0080
0081 protected static final String PUNCTUATION_CHARS_ALLOWED = "._";
0082
0083 /**
0084 * Replaces the current input character stream with a new one.
0085 * @param in New source for input. If null, this method does nothing.
0086 * @return the old stream
0087 */
0088 public Reader setInputReader( Reader in )
0089 {
0090 Reader old = m_in;
0091
0092 if( in != null )
0093 {
0094 m_in = new PushbackReader( new BufferedReader( in ),
0095 PUSHBACK_BUFFER_SIZE );
0096 }
0097
0098 return old;
0099 }
0100
0101 ///**
0102 //* Adds a hook for processing link texts. This hook is called
0103 //* when the link text is written into the output stream, and
0104 //* you may use it to modify the text. It does not affect the
0105 //* actual link, only the user-visible text.
0106 //*
0107 //* @param mutator The hook to call. Null is safe.
0108 //*/
0109 //public void addLinkTransmutator( StringTransmutator mutator )
0110 //{
0111 // if( mutator != null )
0112 // {
0113 // m_linkMutators.add( mutator );
0114 // }
0115 //}
0116
0117 ///**
0118 //* Adds a hook for processing local links. The engine
0119 //* transforms both non-existing and existing page links.
0120 //*
0121 //* @param mutator The hook to call. Null is safe.
0122 //*/
0123 //public void addLocalLinkHook( StringTransmutator mutator )
0124 //{
0125 // if( mutator != null )
0126 // {
0127 // m_localLinkMutatorChain.add( mutator );
0128 // }
0129 //}
0130 //
0131 ///**
0132 //* Adds a hook for processing external links. This includes
0133 //* all http:// ftp://, etc. links, including inlined images.
0134 //*
0135 //* @param mutator The hook to call. Null is safe.
0136 //*/
0137 //public void addExternalLinkHook( StringTransmutator mutator )
0138 //{
0139 // if( mutator != null )
0140 // {
0141 // m_externalLinkMutatorChain.add( mutator );
0142 // }
0143 //}
0144 //
0145 ///**
0146 //* Adds a hook for processing attachment links.
0147 //*
0148 //* @param mutator The hook to call. Null is safe.
0149 //*/
0150 //public void addAttachmentLinkHook( StringTransmutator mutator )
0151 //{
0152 // if( mutator != null )
0153 // {
0154 // m_attachmentLinkMutatorChain.add( mutator );
0155 // }
0156 //}
0157
0158 //public void addHeadingListener( HeadingListener listener )
0159 //{
0160 // if( listener != null )
0161 // {
0162 // m_headingListenerChain.add( listener );
0163 // }
0164 //}
0165 //
0166 public void disableAccessRules()
0167 {
0168 m_parseAccessRules = false;
0169 }
0170
0171 /**
0172 * Use this to turn on or off image inlining.
0173 * @param toggle If true, images are inlined (as per set in jspwiki.properties)
0174 * If false, then images won't be inlined; instead, they will be
0175 * treated as standard hyperlinks.
0176 * @since 2.2.9
0177 */
0178 public void enableImageInlining( boolean toggle )
0179 {
0180 m_inlineImages = toggle;
0181 }
0182
0183
0184 /**
0185 * Return the current position in the reader stream.
0186 * The value will be -1 prior to reading.
0187 * @return the reader position as an int.
0188 */
0189 public int getPosition()
0190 {
0191 return m_pos;
0192 }
0193
0194 protected int nextToken()
0195 throws IOException
0196 {
0197 if( m_in == null ) return -1;
0198 m_pos++;
0199 return m_in.read();
0200 }
0201
0202 /**
0203 * Push back any character to the current input. Does not
0204 * push back a read EOF, though.
0205 */
0206 protected void pushBack( int c )
0207 throws IOException
0208 {
0209 if( c != -1 && m_in != null )
0210 {
0211 m_pos--;
0212 m_in.unread( c );
0213 }
0214 }
0215
0216 /**
0217 * Cleans a Wiki name.
0218 * <P>
0219 * [ This is a link ] -> ThisIsALink
0220 *
0221 * @param link Link to be cleared. Null is safe, and causes this to return null.
0222 * @return A cleaned link.
0223 *
0224 * @since 2.0
0225 */
0226 public static String cleanLink( String link )
0227 {
0228 if( link == null ) return null;
0229
0230 StringBuffer clean = new StringBuffer(link.length());
0231
0232 //
0233 // Remove non-alphanumeric characters that should not
0234 // be put inside WikiNames. Note that all valid
0235 // Unicode letters are considered okay for WikiNames.
0236 // It is the problem of the WikiPageProvider to take
0237 // care of actually storing that information.
0238 //
0239 // Also capitalize things, if necessary.
0240 //
0241
0242 boolean isWord = true; // If true, we've just crossed a word boundary
0243
0244 for( int i = 0; i < link.length(); i++ )
0245 {
0246 char ch = link.charAt(i);
0247
0248 if( Character.isLetterOrDigit( ch ) || PUNCTUATION_CHARS_ALLOWED.indexOf(ch) != -1 )
0249 {
0250 // Is a letter
0251
0252 if( isWord ) ch = Character.toUpperCase( ch );
0253 clean.append( ch );
0254 isWord = false;
0255 }
0256 else
0257 {
0258 isWord = true;
0259 }
0260 }
0261
0262 return clean.toString();
0263 }
0264
0265
0266
0267 //////////////////////////END MP ///////////////////
0268
0269
0270
0271
0272
0273
0274
0275 /** Name of the outlink image; relative path to the JSPWiki directory. */
0276 private static final String OUTLINK_IMAGE = "images/out.png";
0277
0278 /**
0279 * The value for anchor element <tt>class</tt> attributes when used for wiki
0280 * page (normal) links. The value is "wikipage".
0281 */
0282 public static final String CLASS_WIKIPAGE = "wikipage";
0283
0284 /**
0285 * The value for anchor element <tt>class</tt> attributes when used for edit
0286 * page links. The value is "editpage".
0287 */
0288 public static final String CLASS_EDITPAGE = "editpage";
0289
0290 /**
0291 * The value for anchor element <tt>class</tt> attributes when used for
0292 * interwiki page links. The value is "interwiki".
0293 */
0294 public static final String CLASS_INTERWIKI = "interwiki";
0295
0296 private static final int READ = 0;
0297
0298 private static final int EDIT = 1;
0299
0300 private static final int EMPTY = 2; // Empty message
0301
0302 private static final int LOCAL = 3;
0303
0304 private static final int LOCALREF = 4;
0305
0306 private static final int IMAGE = 5;
0307
0308 private static final int EXTERNAL = 6;
0309
0310 private static final int INTERWIKI = 7;
0311
0312 private static final int IMAGELINK = 8;
0313
0314 private static final int IMAGEWIKILINK = 9;
0315
0316 private static final int ATTACHMENT = 10;
0317
0318 // private static final int ATTACHMENTIMAGE = 11;
0319 private static Logger log = Logger.getLogger(JSPWikiMarkupParser.class);
0320
0321 // private boolean m_iscode = false;
0322 private boolean m_isbold = false;
0323
0324 private boolean m_isitalic = false;
0325
0326 private boolean m_istable = false;
0327
0328 private boolean m_isPre = false;
0329
0330 private boolean m_isEscaping = false;
0331
0332 private boolean m_isdefinition = false;
0333
0334 private boolean m_isPreBlock = false;
0335
0336 /** Contains style information, in multiple forms. */
0337 private Stack m_styleStack = new Stack();
0338
0339 // general list handling
0340 private int m_genlistlevel = 0;
0341
0342 private StringBuffer m_genlistBulletBuffer = new StringBuffer(10); // stores
0343 // the #
0344 // and *
0345 // pattern
0346
0347 private boolean m_allowPHPWikiStyleLists = true;
0348
0349 private boolean m_isOpenParagraph = false;
0350
0351 /** Keeps image regexp Patterns */
0352 private ArrayList m_inlineImagePatterns;
0353
0354 private PatternMatcher m_inlineMatcher = new Perl5Matcher();
0355
0356 /** Keeps track of any plain text that gets put in the Text nodes */
0357 private StringBuffer m_plainTextBuf = new StringBuffer(20);
0358
0359 private Element m_currentElement;
0360
0361 /**
0362 * This property defines the inline image pattern. It's current value is
0363 * jspwiki.translatorReader.inlinePattern
0364 */
0365 public static final String PROP_INLINEIMAGEPTRN = "jspwiki.translatorReader.inlinePattern";
0366
0367 /** If true, consider CamelCase hyperlinks as well. */
0368 public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks";
0369
0370 /**
0371 * If true, all hyperlinks are translated as well, regardless whether they are
0372 * surrounded by brackets.
0373 */
0374 public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris";
0375
0376 /**
0377 * If true, all outward links (external links) have a small link image
0378 * appended.
0379 */
0380 public static final String PROP_USEOUTLINKIMAGE = "jspwiki.translatorReader.useOutlinkImage";
0381
0382 /**
0383 * If true, all outward attachment info links have a small link image
0384 * appended.
0385 */
0386 public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";
0387
0388 /** If set to "true", all external links are tagged with 'rel="nofollow"' */
0389 public static final String PROP_USERELNOFOLLOW = "jspwiki.translatorReader.useRelNofollow";
0390
0391 /** If true, then considers CamelCase links as well. */
0392 private boolean m_camelCaseLinks = false;
0393
0394 /** If true, consider URIs that have no brackets as well. */
0395 // FIXME: Currently reserved, but not used.
0396 private boolean m_plainUris = false;
0397
0398 /** If true, all outward links use a small link image. */
0399 private boolean m_useOutlinkImage = true;
0400
0401 private boolean m_useAttachmentImage = true;
0402
0403 /** If true, allows raw HTML. */
0404 private boolean m_allowHTML = false;
0405
0406 private boolean m_useRelNofollow = false;
0407
0408 private PatternCompiler m_compiler = new Perl5Compiler();
0409
0410 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;]+))";
0411
0412 private PatternMatcher m_camelCaseMatcher = new Perl5Matcher();
0413
0414 private Pattern m_camelCasePattern;
0415
0416 private int m_rowNum = 1;
0417
0418 /**
0419 * The default inlining pattern. Currently "*.png"
0420 */
0421 public static final String DEFAULT_INLINEPATTERN = "*.png";
0422
0423 /**
0424 * This list contains all IANA registered URI protocol types as of September
0425 * 2004 + a few well-known extra types.
0426 *
0427 * JSPWiki recognises all of them as external links.
0428 *
0429 * This array is sorted during class load, so you can just dump here whatever
0430 * you want in whatever order you want.
0431 */
0432 static final String[] c_externalLinks = {"http:", "ftp:", "https:",
0433 "mailto:", "news:", "file:", "rtsp:", "mms:", "ldap:", "gopher:",
0434 "nntp:", "telnet:", "wais:", "prospero:", "z39.50s", "z39.50r", "vemmi:",
0435 "imap:", "nfs:", "acap:", "tip:", "pop:", "dav:", "opaquelocktoken:",
0436 "sip:", "sips:", "tel:", "fax:", "modem:", "soap.beep:", "soap.beeps",
0437 "xmlrpc.beep", "xmlrpc.beeps", "urn:", "go:", "h323:", "ipp:", "tftp:",
0438 "mupdate:", "pres:", "im:", "mtqp", "smb:"};
0439
0440 /**
0441 * This Comparator is used to find an external link from c_externalLinks. It
0442 * checks if the link starts with the other arraythingie.
0443 */
0444 private static Comparator c_startingComparator = new StartingComparator();
0445 static {
0446 Arrays.sort(c_externalLinks);
0447 }
0448
0449 /**
0450 * Creates a markup parser.
0451 */
0452 public JSPWikiMarkupParser(Reader in) {
0453 setInputReader( in );
0454 initialize();
0455 }
0456
0457 /**
0458 * @param engine
0459 * The WikiEngine this reader is attached to. Is used to figure out
0460 * of a page exits.
0461 */
0462 // FIXME: parsers should be pooled for better performance.
0463 private void initialize() {
0464 PatternCompiler compiler = new GlobCompiler();
0465 ArrayList compiledpatterns = new ArrayList();
0466 Collection ptrns = getImagePatterns(/* m_engine */);
0467 //
0468 // Make them into Regexp Patterns. Unknown patterns
0469 // are ignored.
0470 //
0471 for(Iterator i = ptrns.iterator(); i.hasNext();) {
0472 try {
0473 compiledpatterns.add(compiler.compile((String)i.next()));
0474 } catch(MalformedPatternException e) {
0475 log.error("Malformed pattern in properties: ", e);
0476 }
0477 }
0478 m_inlineImagePatterns = compiledpatterns;
0479 try {
0480 m_camelCasePattern = m_compiler.compile(WIKIWORD_REGEX);
0481 } catch(MalformedPatternException e) {
0482 log.fatal("Internal error: Someone put in a faulty pattern.", e);
0483 throw new RuntimeException("Faulty camelcasepattern in TranslatorReader");
0484 }
0485 //
0486 // Set the properties.
0487 //
0488 Properties props = new Properties();
0489 m_camelCaseLinks = getBooleanProperty(props, PROP_CAMELCASELINKS,
0490 m_camelCaseLinks);
0491 m_plainUris = getBooleanProperty(props, PROP_PLAINURIS,
0492 m_plainUris);
0493 m_useOutlinkImage = getBooleanProperty(props,
0494 PROP_USEOUTLINKIMAGE, m_useOutlinkImage);
0495 m_useAttachmentImage = getBooleanProperty(props,
0496 PROP_USEATTACHMENTIMAGE, m_useAttachmentImage);
0497 m_allowHTML = getBooleanProperty(props,
0498 PROP_ALLOWHTML, m_allowHTML);
0499 m_useRelNofollow = getBooleanProperty(props, PROP_USERELNOFOLLOW,
0500 m_useRelNofollow);
0501 // if( m_engine.getUserManager().getUserDatabase() == null ||
0502 // m_engine.getAuthorizationManager() == null )
0503 // {
0504 disableAccessRules();
0505 // }
0506 // m_context.getPage().setHasMetadata();
0507 }
0508
0509 /**
0510 * Figure out which image suffixes should be inlined.
0511 *
0512 * @return Collection of Strings with patterns.
0513 */
0514 // FIXME: Does not belong here; should be elsewhere
0515 public static Collection getImagePatterns( /* WikiEngine engine */) {
0516 // Properties props = engine.getWikiProperties();
0517 ArrayList ptrnlist = new ArrayList();
0518 ptrnlist.add(DEFAULT_INLINEPATTERN);
0519 return ptrnlist;
0520 }
0521
0522 /**
0523 * Returns link name, if it exists; otherwise it returns null.
0524 */
0525 private String linkExists(String page) {
0526 return page;
0527 }
0528
0529 private Element makeLink(int type, String link, String text, String section) {
0530 Element el = null;
0531 if(text == null) text = link;
0532 // text = callMutatorChain( m_linkMutators, text );
0533 section = (section != null) ? ("#" + section) : "";
0534 // Make sure we make a link name that can be accepted
0535 // as a valid URL.
0536 if(link.length() == 0) {
0537 type = EMPTY;
0538 }
0539 switch(type){
0540 case READ:
0541 el = new Element("a").setAttribute("class", CLASS_WIKIPAGE);
0542 el.setAttribute("href", "VIEW" + link + section);
0543 el.addContent(text);
0544 break;
0545 case EDIT:
0546 el = new Element("a").setAttribute("class", CLASS_EDITPAGE);
0547 el.setAttribute("title", "Create '" + link + "'");
0548 el.setAttribute("href", "EDIT" + link);
0549 el.addContent(text);
0550 break;
0551 case EMPTY:
0552 el = new Element("u").addContent(text);
0553 break;
0554 //
0555 // These two are for local references - footnotes and
0556 // references to footnotes.
0557 // We embed the page name (or whatever WikiContext gives us)
0558 // to make sure the links are unique across Wiki.
0559 //
0560 case LOCALREF:
0561 el = new Element("a").setAttribute("class", "footnoteref");
0562 el.setAttribute("href", "#ref-" + link);
0563 el.addContent("[" + text + "]");
0564 break;
0565 case LOCAL:
0566 el = new Element("a").setAttribute("class", "footnote");
0567 el.setAttribute("name", "ref-" + link.substring(1));
0568 el.addContent("[" + text + "]");
0569 break;
0570 //
0571 // With the image, external and interwiki types we need to
0572 // make sure nobody can put in Javascript or something else
0573 // annoying into the links themselves. We do this by preventing
0574 // a haxor from stopping the link name short with quotes in
0575 // fillBuffer().
0576 //
0577 case IMAGE:
0578 el = new Element("img").setAttribute("class", "inline");
0579 el.setAttribute("src", link);
0580 el.setAttribute("alt", text);
0581 break;
0582 case IMAGELINK:
0583 el = new Element("img").setAttribute("class", "inline");
0584 el.setAttribute("src", link);
0585 el.setAttribute("alt", text);
0586 el = new Element("a").setAttribute("href", text).addContent(el);
0587 break;
0588 case IMAGEWIKILINK:
0589 String pagelink = text;
0590 el = new Element("img").setAttribute("class", "inline");
0591 el.setAttribute("src", link);
0592 el.setAttribute("alt", text);
0593 el = new Element("a").setAttribute("class", CLASS_WIKIPAGE)
0594 .setAttribute("href", pagelink).addContent(el);
0595 break;
0596 case EXTERNAL:
0597 el = new Element("a").setAttribute("class", "external");
0598 if(m_useRelNofollow) el.setAttribute("rel", "nofollow");
0599 el.setAttribute("href", link + section);
0600 el.addContent(text);
0601 break;
0602 case INTERWIKI:
0603 el = new Element("a").setAttribute("class", CLASS_INTERWIKI);
0604 el.setAttribute("href", link + section);
0605 el.addContent(text);
0606 break;
0607 case ATTACHMENT:
0608 String attlink = "ATTACH" + link;
0609 String infolink = "INFO" + link;
0610 String imglink = "NONE" + "images/attachment_small.png";
0611 el = new Element("a").setAttribute("class", "attachment");
0612 el.setAttribute("href", attlink);
0613 el.addContent(text);
0614 pushElement(el);
0615 popElement(el.getName());
0616 if(m_useAttachmentImage) {
0617 el = new Element("img").setAttribute("src", imglink);
0618 el.setAttribute("border", "0");
0619 el.setAttribute("alt", "(info)");
0620 el = new Element("a").setAttribute("href", infolink).addContent(el);
0621 } else {
0622 el = null;
0623 }
0624 break;
0625 default:
0626 break;
0627 }
0628 if(el != null) {
0629 flushPlainText();
0630 m_currentElement.addContent(el);
0631 }
0632 return el;
0633 }
0634
0635 /**
0636 * Figures out if a link is an off-site link. This recognizes the most common
0637 * protocols by checking how it starts.
0638 *
0639 * @since 2.4
0640 */
0641 public static boolean isExternalLink(String link) {
0642 int idx = Arrays.binarySearch(c_externalLinks, link, c_startingComparator);
0643 //
0644 // We need to check here once again; otherwise we might
0645 // get a match for something like "h".
0646 //
0647 if(idx >= 0 && link.startsWith(c_externalLinks[idx])) return true;
0648 return false;
0649 }
0650
0651
0652 /**
0653 * Gets a boolean property from a standard Properties list.
0654 * Returns the default value, in case the key has not been set.
0655 * <P>
0656 * The possible values for the property are "true"/"false", "yes"/"no", or
0657 * "on"/"off". Any value not recognized is always defined as "false".
0658 *
0659 * @param props A list of properties to search.
0660 * @param key The property key.
0661 * @param defval The default value to return.
0662 *
0663 * @return True, if the property "key" was set to "true", "on", or "yes".
0664 *
0665 * @since 2.0.11
0666 */
0667 public static boolean getBooleanProperty( Properties props,
0668 String key,
0669 boolean defval )
0670 {
0671 String val = props.getProperty( key );
0672
0673 if( val == null ) return defval;
0674
0675 return isPositive( val );
0676 }
0677
0678 /**
0679 * Returns true, if the string "val" denotes a positive string. Allowed
0680 * values are "yes", "on", and "true". Comparison is case-insignificant.
0681 * Null values are safe.
0682 *
0683 * @param val Value to check.
0684 * @return True, if val is "true", "on", or "yes"; otherwise false.
0685 *
0686 * @since 2.0.26
0687 */
0688 public static boolean isPositive( String val )
0689 {
0690 if( val == null ) return false;
0691
0692 val = val.trim();
0693
0694 return ( val.equalsIgnoreCase("true") || val.equalsIgnoreCase("on") ||
0695 val.equalsIgnoreCase("yes") );
0696 }
0697
0698
0699 /**
0700 * Returns true, if the argument contains a number, otherwise false.
0701 * In a quick test this is roughly the same speed as Integer.parseInt()
0702 * if the argument is a number, and roughly ten times the speed, if
0703 * the argument is NOT a number.
0704 *
0705 * @since 2.4
0706 */
0707
0708 public static boolean isNumber( String s )
0709 {
0710 if( s == null ) return false;
0711
0712 if( s.length() > 1 && s.charAt(0) == '-' )
0713 s = s.substring(1);
0714
0715 for( int i = 0; i < s.length(); i++ )
0716 {
0717 if( !Character.isDigit(s.charAt(i)) )
0718 return false;
0719 }
0720
0721 return true;
0722 }
0723 /**
0724 * Returns true, if the link in question is an access rule.
0725 */
0726 private static boolean isAccessRule(String link) {
0727 return link.startsWith("{ALLOW") || link.startsWith("{DENY");
0728 }
0729
0730 /**
0731 * Matches the given link to the list of image name patterns to determine
0732 * whether it should be treated as an inline image or not.
0733 */
0734 private boolean isImageLink(String link) {
0735 if(m_inlineImages) {
0736 for(Iterator i = m_inlineImagePatterns.iterator(); i.hasNext();) {
0737 if(m_inlineMatcher.matches(link, (Pattern)i.next())) return true;
0738 }
0739 }
0740 return false;
0741 }
0742
0743 private static boolean isMetadata(String link) {
0744 return link.startsWith("{SET");
0745 }
0746
0747 /**
0748 * This method peeks ahead in the stream until EOL and returns the result. It
0749 * will keep the buffers untouched.
0750 *
0751 * @return The string from the current position to the end of line.
0752 */
0753 // FIXME: Always returns an empty line, even if the stream is full.
0754 private String peekAheadLine() throws IOException {
0755 String s = readUntilEOL().toString();
0756 if(s.length() > PUSHBACK_BUFFER_SIZE) {
0757 log
0758 .warn("Line is longer than maximum allowed size ("
0759 + PUSHBACK_BUFFER_SIZE
0760 + " characters. Attempting to recover...");
0761 pushBack(s.substring(0, PUSHBACK_BUFFER_SIZE - 1));
0762 } else {
0763 try {
0764 pushBack(s);
0765 } catch(IOException e) {
0766 log
0767 .warn("Pushback failed: the line is probably too long. Attempting to recover.");
0768 }
0769 }
0770 return s;
0771 }
0772
0773 /**
0774 * Writes HTML for error message.
0775 */
0776 public static Element makeError(String error) {
0777 return new Element("span").setAttribute("class", "error").addContent(error);
0778 }
0779
0780 private int flushPlainText() {
0781 int numChars = m_plainTextBuf.length();
0782 if(numChars > 0) {
0783 String buf;
0784 if(!m_allowHTML) {
0785 buf = escapeHTMLEntities(m_plainTextBuf);
0786 } else {
0787 buf = m_plainTextBuf.toString();
0788 }
0789 //
0790 // We must first empty the buffer because the side effect of
0791 // calling makeCamelCaseLink() is to call this routine.
0792 //
0793 m_plainTextBuf = new StringBuffer(20);
0794 try {
0795 //
0796 // This is the heaviest part of parsing, and therefore we can
0797 // do some optimization here.
0798 //
0799 // 1) Only when the length of the buffer is big enough, we try to do the
0800 // match
0801 //
0802 if(m_camelCaseLinks && !m_isEscaping && buf.length() > 3) {
0803 // System.out.println("Buffer="+buf);
0804 while(m_camelCaseMatcher.contains(buf, m_camelCasePattern)) {
0805 MatchResult result = m_camelCaseMatcher.getMatch();
0806 String firstPart = buf.substring(0, result.beginOffset(0));
0807 String prefix = result.group(1);
0808 if(prefix == null) prefix = "";
0809 String camelCase = result.group(2);
0810 String protocol = result.group(3);
0811 String uri = protocol + result.group(4);
0812 buf = buf.substring(result.endOffset(0));
0813 m_currentElement.addContent(firstPart);
0814 //
0815 // Check if the user does not wish to do URL or WikiWord expansion
0816 //
0817 if(prefix.endsWith("~") || prefix.indexOf('[') != -1) {
0818 if(prefix.endsWith("~"))
0819 prefix = prefix.substring(0, prefix.length() - 1);
0820 if(camelCase != null) {
0821 m_currentElement.addContent(prefix + camelCase);
0822 } else if(protocol != null) {
0823 m_currentElement.addContent(prefix + uri);
0824 }
0825 continue;
0826 }
0827 //
0828 // Fine, then let's check what kind of a link this was
0829 // and emit the proper elements
0830 //
0831 if(protocol != null) {
0832 char c = uri.charAt(uri.length() - 1);
0833 if(c == '.' || c == ',') {
0834 uri = uri.substring(0, uri.length() - 1);
0835 buf = c + buf;
0836 }
0837 // System.out.println("URI match "+uri);
0838 m_currentElement.addContent(prefix);
0839 makeDirectURILink(uri);
0840 } else {
0841 // System.out.println("Matched: '"+camelCase+"'");
0842 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
0843 // System.out.println("prefix="+prefix);
0844 m_currentElement.addContent(prefix);
0845 makeCamelCaseLink(camelCase);
0846 }
0847 }
0848 m_currentElement.addContent(buf);
0849 } else {
0850 //
0851 // No camelcase asked for, just add the elements
0852 //
0853 m_currentElement.addContent(buf);
0854 }
0855 } catch(IllegalDataException e) {
0856 //
0857 // Sometimes it's possible that illegal XML chars is added to the data.
0858 // Here we make sure it does not stop parsing.
0859 //
0860 m_currentElement.addContent(makeError(e.getMessage()));
0861 }
0862 }
0863 return numChars;
0864 }
0865
0866 /**
0867 * Escapes XML entities in a HTML-compatible way (i.e. does not escape
0868 * entities that are already escaped).
0869 *
0870 * @param buf
0871 * @return
0872 */
0873 private String escapeHTMLEntities(StringBuffer buf) {
0874 StringBuffer tmpBuf = new StringBuffer(buf.length() + 20);
0875 for(int i = 0; i < buf.length(); i++) {
0876 char ch = buf.charAt(i);
0877 if(ch == '<') {
0878 tmpBuf.append("<");
0879 } else if(ch == '>') {
0880 tmpBuf.append(">");
0881 } else if(ch == '&') {
0882 for(int j = (i < buf.length() - 1) ? i + 1 : i; j < buf.length(); j++) {
0883 int ch2 = buf.charAt(j);
0884 if(ch2 == ';') {
0885 tmpBuf.append(ch);
0886 break;
0887 }
0888 if(ch2 != '#' && !Character.isLetterOrDigit((char)ch2)) {
0889 tmpBuf.append("&");
0890 break;
0891 }
0892 }
0893 } else {
0894 tmpBuf.append(ch);
0895 }
0896 }
0897 return tmpBuf.toString();
0898 }
0899
0900 private Element pushElement(Element e) {
0901 flushPlainText();
0902 m_currentElement.addContent(e);
0903 m_currentElement = e;
0904 return e;
0905 }
0906
0907 private Element addElement(Content e) {
0908 if(e != null) {
0909 flushPlainText();
0910 m_currentElement.addContent(e);
0911 }
0912 return m_currentElement;
0913 }
0914
0915 /**
0916 * All elements that can be empty by the HTML DTD.
0917 */
0918 // Keep sorted.
0919 private static final String[] EMPTY_ELEMENTS = {"area", "base", "br", "col",
0920 "hr", "img", "input", "link", "meta", "p", "param"};
0921
0922 private Element popElement(String s) {
0923 int flushedBytes = flushPlainText();
0924 Element currEl = m_currentElement;
0925 while(currEl.getParentElement() != null) {
0926 if(currEl.getName().equals(s) && !currEl.isRootElement()) {
0927 m_currentElement = currEl.getParentElement();
0928 //
0929 // Check if it's okay for this element to be empty. Then we will
0930 // trick the JDOM generator into not generating an empty element,
0931 // by putting an empty string between the tags. Yes, it's a kludge
0932 // but what'cha gonna do about it. :-)
0933 //
0934 if(flushedBytes == 0 && Arrays.binarySearch(EMPTY_ELEMENTS, s) < 0) {
0935 currEl.addContent("");
0936 }
0937 return m_currentElement;
0938 }
0939 currEl = currEl.getParentElement();
0940 }
0941 return m_currentElement;
0942 }
0943
0944 /**
0945 * Reads the stream until it meets one of the specified ending characters, or
0946 * stream end. The ending character will be left in the stream.
0947 */
0948 private String readUntil(String endChars) throws IOException {
0949 StringBuffer sb = new StringBuffer(80);
0950 int ch = nextToken();
0951 while(ch != -1) {
0952 if(ch == '\\') {
0953 ch = nextToken();
0954 if(ch == -1) {
0955 break;
0956 }
0957 } else {
0958 if(endChars.indexOf((char)ch) != -1) {
0959 pushBack(ch);
0960 break;
0961 }
0962 }
0963 sb.append((char)ch);
0964 ch = nextToken();
0965 }
0966 return sb.toString();
0967 }
0968
0969 /**
0970 * Reads the stream while the characters that have been specified are in the
0971 * stream, returning then the result as a String.
0972 */
0973 private String readWhile(String endChars) throws IOException {
0974 StringBuffer sb = new StringBuffer(80);
0975 int ch = nextToken();
0976 while(ch != -1) {
0977 if(endChars.indexOf((char)ch) == -1) {
0978 pushBack(ch);
0979 break;
0980 }
0981 sb.append((char)ch);
0982 ch = nextToken();
0983 }
0984 return sb.toString();
0985 }
0986
0987 private JSPWikiMarkupParser m_cleanTranslator;
0988
0989 /**
0990 * Does a lazy init. Otherwise, we would get into a situation where
0991 * HTMLRenderer would try and boot a TranslatorReader before the
0992 * TranslatorReader it is contained by is up.
0993 */
0994 private JSPWikiMarkupParser getCleanTranslator() {
0995 if(m_cleanTranslator == null) {
0996 m_cleanTranslator = new JSPWikiMarkupParser(null);
0997 m_cleanTranslator.m_allowHTML = true;
0998 }
0999 return m_cleanTranslator;
1000 }
1001
1002 /**
1003 * Modifies the "hd" parameter to contain proper values. Because an "id" tag
1004 * may only contain [a-zA-Z0-9:_-], we'll replace the % after url encoding
1005 * with '_'.
1006 */
1007 // FIXME: This method should probably be public and in an util class somewhere
1008 private String makeHeadingAnchor(String baseName, String title, Heading hd) {
1009 hd.m_titleText = title;
1010 title = cleanLink(title);
1011 hd.m_titleSection = title;
1012 hd.m_titleAnchor = "section-" + baseName + "-" + hd.m_titleSection;
1013 hd.m_titleAnchor = hd.m_titleAnchor.replace('%', '_');
1014 hd.m_titleAnchor = hd.m_titleAnchor.replace('/', '_');
1015 return hd.m_titleAnchor;
1016 }
1017
1018 private String makeSectionTitle(String title) {
1019 title = title.trim();
1020 String outTitle;
1021 try {
1022 JSPWikiMarkupParser dtr = getCleanTranslator();
1023 dtr.setInputReader(new StringReader(title));
1024 CleanTextRenderer ctt = new CleanTextRenderer(/* m_context, */dtr.parse());
1025 outTitle = ctt.getString();
1026 } catch(IOException e) {
1027 log.fatal("CleanTranslator not working", e);
1028 throw new RuntimeException(
1029 "CleanTranslator not working as expected, when cleaning title"
1030 + e.getMessage());
1031 }
1032 return outTitle;
1033 }
1034
1035 /**
1036 * Returns XHTML for the start of the heading. Also sets the line-end emitter.
1037 *
1038 * @param level
1039 * @param title
1040 * the title for the heading
1041 * @param hd
1042 * a List to which heading should be added
1043 */
1044 public Element makeHeading(int level, String pageName, String title,
1045 Heading hd) {
1046 Element el = null;
1047 String outTitle = makeSectionTitle(title);
1048 hd.m_level = level;
1049 switch(level){
1050 case Heading.HEADING_SMALL:
1051 el = new Element("h4").setAttribute("id", makeHeadingAnchor(pageName,
1052 outTitle, hd));
1053 break;
1054 case Heading.HEADING_MEDIUM:
1055 el = new Element("h3").setAttribute("id", makeHeadingAnchor(pageName,
1056 outTitle, hd));
1057 break;
1058 case Heading.HEADING_LARGE:
1059 el = new Element("h2").setAttribute("id", makeHeadingAnchor(pageName,
1060 outTitle, hd));
1061 break;
1062 }
1063 return el;
1064 }
1065
1066 /**
1067 * When given a link to a WikiName, we just return a proper HTML link for it.
1068 * The local link mutator chain is also called.
1069 */
1070 private Element makeCamelCaseLink(String wikiname) {
1071 String matchedLink;
1072 // callMutatorChain( m_localLinkMutatorChain, wikiname );
1073 if((matchedLink = linkExists(wikiname)) != null) {
1074 makeLink(READ, matchedLink, wikiname, null);
1075 } else {
1076 makeLink(EDIT, wikiname, wikiname, null);
1077 }
1078 return m_currentElement;
1079 }
1080
1081 /** Holds the image URL for the duration of this parser */
1082 private String m_outlinkImageURL = null;
1083
1084 /**
1085 * Returns an element for the external link image (out.png). However, this
1086 * method caches the URL for the lifetime of this MarkupParser, because it's
1087 * commonly used, and we'll end up with possibly hundreds our thousands of
1088 * references to it... It's a lot faster, too.
1089 *
1090 * @return An element containing the HTML for the outlink image.
1091 */
1092 private Element outlinkImage() {
1093 Element el = null;
1094 if(m_useOutlinkImage) {
1095 if(m_outlinkImageURL == null) {
1096 m_outlinkImageURL = OUTLINK_IMAGE;
1097 }
1098 el = new Element("img").setAttribute("class", "outlink");
1099 el.setAttribute("src", m_outlinkImageURL);
1100 el.setAttribute("alt", "");
1101 }
1102 return el;
1103 }
1104
1105 /**
1106 * Takes an URL and turns it into a regular wiki link. Unfortunately, because
1107 * of the way that flushPlainText() works, it already encodes all of the XML
1108 * entities. But so does WikiContext.getURL(), so we have to do a
1109 * reverse-replace here, so that it can again be replaced in makeLink.
1110 * <p>
1111 * What a crappy problem.
1112 *
1113 * @param url
1114 * @return
1115 */
1116 private Element makeDirectURILink(String url) {
1117 Element result;
1118 String last = null;
1119 if(url.endsWith(",") || url.endsWith(".")) {
1120 last = url.substring(url.length() - 1);
1121 url = url.substring(0, url.length() - 1);
1122 }
1123 // callMutatorChain( m_externalLinkMutatorChain, url );
1124 if(isImageLink(url)) {
1125 result = handleImageLink(StringUtils.replace(url, "&", "&"), url,
1126 false);
1127 } else {
1128 result = makeLink(EXTERNAL, StringUtils.replace(url, "&", "&"), url,
1129 null);
1130 addElement(outlinkImage());
1131 }
1132 if(last != null) {
1133 m_plainTextBuf.append(last);
1134 }
1135 return result;
1136 }
1137
1138 /**
1139 * Image links are handled differently: 1. If the text is a WikiName of an
1140 * existing page, it gets linked. 2. If the text is an external link, then it
1141 * is inlined. 3. Otherwise it becomes an ALT text.
1142 *
1143 * @param reallink
1144 * The link to the image.
1145 * @param link
1146 * Link text portion, may be a link to somewhere else.
1147 * @param hasLinkText
1148 * If true, then the defined link had a link text available. This
1149 * means that the link text may be a link to a wiki page, or an
1150 * external resource.
1151 */
1152 // FIXME: isExternalLink() is called twice.
1153 private Element handleImageLink(String reallink, String link,
1154 boolean hasLinkText) {
1155 String possiblePage = cleanLink(link);
1156 if(isExternalLink(link) && hasLinkText) {
1157 return makeLink(IMAGELINK, reallink, link, null);
1158 } else if((linkExists(possiblePage)) != null && hasLinkText) {
1159 // System.out.println("Orig="+link+", Matched: "+matchedLink);
1160 // callMutatorChain( m_localLinkMutatorChain, possiblePage );
1161 return makeLink(IMAGEWIKILINK, reallink, link, null);
1162 } else {
1163 return makeLink(IMAGE, reallink, link, null);
1164 }
1165 }
1166
1167 // private Element handleAccessRule( String ruleLine )
1168 // {
1169 // if( !m_parseAccessRules ) return m_currentElement;
1170 // Acl acl;
1171 // WikiPage page = m_context.getPage();
1172 // // UserDatabase db = m_context.getEngine().getUserDatabase();
1173 //
1174 // if( ruleLine.startsWith( "{" ) )
1175 // ruleLine = ruleLine.substring( 1 );
1176 // if( ruleLine.endsWith( "}" ) )
1177 // ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
1178 //
1179 // log.debug("page="+page.getName()+", ACL = "+ruleLine);
1180 //
1181 // try
1182 // {
1183 // acl = m_engine.getAclManager().parseAcl( page, ruleLine );
1184 //
1185 // page.setAcl( acl );
1186 //
1187 // log.debug( acl.toString() );
1188 // }
1189 // catch( WikiSecurityException wse )
1190 // {
1191 // return makeError( wse.getMessage() );
1192 // }
1193 //
1194 // return m_currentElement;
1195 // }
1196 /**
1197 * Handles metadata setting [{SET foo=bar}]
1198 */
1199 private Element handleMetadata(String link) {
1200 try {
1201 String args = link.substring(link.indexOf(' '), link.length() - 1);
1202 String name = args.substring(0, args.indexOf('='));
1203 String val = args.substring(args.indexOf('=') + 1, args.length());
1204 name = name.trim();
1205 val = val.trim();
1206 if(val.startsWith("'")) val = val.substring(1);
1207 if(val.endsWith("'")) val = val.substring(0, val.length() - 1);
1208 // log.debug("SET name='"+name+"', value='"+val+"'.");
1209 if(name.length() > 0 && val.length() > 0) {
1210 // val = m_engine.getVariableManager().expandVariables( m_context,
1211 // val );
1212 //
1213 // m_context.getPage().setAttribute( name, val );
1214 }
1215 } catch(Exception e) {
1216 return makeError(" Invalid SET found: " + link);
1217 }
1218 return m_currentElement;
1219 }
1220
1221 /**
1222 * Emits a processing instruction that will disable markup escaping. This is
1223 * very useful if you want to emit HTML directly into the stream.
1224 *
1225 */
1226 private void disableOutputEscaping() {
1227 addElement(new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, ""));
1228 }
1229
1230 /**
1231 * Gobbles up all hyperlinks that are encased in square brackets.
1232 */
1233 private Element handleHyperlinks(String link, int pos) {
1234 StringBuffer sb = new StringBuffer(link.length() + 80);
1235 String reallink;
1236 int cutpoint;
1237 if(isAccessRule(link)) {
1238 // return handleAccessRule( link );
1239 return null;
1240 }
1241 if(isMetadata(link)) { return handleMetadata(link); }
1242 // if( PluginManager.isPluginLink( link ) )
1243 // {
1244 // try
1245 // {
1246 // Content pluginContent = m_engine.getPluginManager().parsePluginLine(
1247 // m_context, link, pos );
1248 //
1249 // addElement( pluginContent );
1250 // }
1251 // catch( PluginException e )
1252 // {
1253 // log.info( "Failed to insert plugin", e );
1254 // log.info( "Root cause:",e.getRootThrowable() );
1255 // return addElement( makeError("Plugin insertion failed: "+e.getMessage())
1256 // );
1257 // }
1258 //
1259 // return m_currentElement;
1260 // }
1261 // link = TextUtil.replaceEntities( link );
1262 if((cutpoint = link.indexOf('|')) != -1) {
1263 reallink = link.substring(cutpoint + 1).trim();
1264 link = link.substring(0, cutpoint);
1265 } else {
1266 reallink = link.trim();
1267 }
1268 int interwikipoint = -1;
1269 //
1270 // Yes, we now have the components separated.
1271 // link = the text the link should have
1272 // reallink = the url or page name.
1273 //
1274 // In many cases these are the same. [link|reallink].
1275 //
1276 // if( VariableManager.isVariableLink( link ) )
1277 // {
1278 // Content el = new VariableContent(link);
1279 //
1280 // addElement( el );
1281 // }
1282 // else
1283 if(isExternalLink(reallink)) {
1284 // It's an external link, out of this Wiki
1285 // callMutatorChain( m_externalLinkMutatorChain, reallink );
1286 if(isImageLink(reallink)) {
1287 handleImageLink(reallink, link, (cutpoint != -1));
1288 } else {
1289 makeLink(EXTERNAL, reallink, link, null);
1290 addElement(outlinkImage());
1291 }
1292 }
1293 // else if( (interwikipoint = reallink.indexOf(":")) != -1 )
1294 // {
1295 // // It's an interwiki link
1296 // // InterWiki links also get added to external link chain
1297 // // after the links have been resolved.
1298 //
1299 // // FIXME: There is an interesting issue here: We probably should
1300 // // URLEncode the wikiPage, but we can't since some of the
1301 // // Wikis use slashes (/), which won't survive URLEncoding.
1302 // // Besides, we don't know which character set the other Wiki
1303 // // is using, so you'll have to write the entire name as it appears
1304 // // in the URL. Bugger.
1305 //
1306 // String extWiki = reallink.substring( 0, interwikipoint );
1307 // String wikiPage = reallink.substring( interwikipoint+1 );
1308 //
1309 // String urlReference = m_engine.getInterWikiURL( extWiki );
1310 //
1311 // if( urlReference != null )
1312 // {
1313 // urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
1314 // callMutatorChain( m_externalLinkMutatorChain, urlReference );
1315 //
1316 // if( isImageLink(urlReference) )
1317 // {
1318 // handleImageLink( urlReference, link, cutpoint != -1 );
1319 // }
1320 // else
1321 // {
1322 // makeLink( INTERWIKI, urlReference, link, null );
1323 // }
1324 //
1325 // if( isExternalLink(urlReference) )
1326 // {
1327 // addElement( outlinkImage() );
1328 // }
1329 // }
1330 // else
1331 // {
1332 // addElement( makeError("No InterWiki reference defined in properties for
1333 // Wiki called '"+extWiki+"'!)") );
1334 // }
1335 // }
1336 else if(reallink.startsWith("#")) {
1337 // It defines a local footnote
1338 makeLink(LOCAL, reallink, link, null);
1339 } else if(isNumber(reallink)) {
1340 // It defines a reference to a local footnote
1341 makeLink(LOCALREF, reallink, link, null);
1342 } else {
1343 int hashMark = -1;
1344 //
1345 // Internal wiki link, but is it an attachment link?
1346 //
1347 String attachment = findAttachment(reallink);
1348 if(attachment != null) {
1349 // callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1350 if(isImageLink(reallink)) {
1351 attachment = "ATTACH" + attachment;
1352 sb.append(handleImageLink(attachment, link, (cutpoint != -1)));
1353 } else {
1354 makeLink(ATTACHMENT, attachment, link, null);
1355 }
1356 } else if((hashMark = reallink.indexOf('#')) != -1) {
1357 // It's an internal Wiki link, but to a named section
1358 String namedSection = reallink.substring(hashMark + 1);
1359 reallink = reallink.substring(0, hashMark);
1360 reallink = cleanLink(reallink);
1361 // callMutatorChain( m_localLinkMutatorChain, reallink );
1362 String matchedLink;
1363 if((matchedLink = linkExists(reallink)) != null) {
1364 String sectref = "section-" + namedSection;
1365 sectref = sectref.replace('%', '_');
1366 makeLink(READ, matchedLink, link, sectref);
1367 } else {
1368 makeLink(EDIT, reallink, link, null);
1369 }
1370 } else {
1371 // It's an internal Wiki link
1372 reallink = cleanLink(reallink);
1373 // callMutatorChain( m_localLinkMutatorChain, reallink );
1374 String matchedLink = linkExists(reallink);
1375 if(matchedLink != null) {
1376 makeLink(READ, matchedLink, link, null);
1377 } else {
1378 makeLink(EDIT, reallink, link, null);
1379 }
1380 }
1381 }
1382 return m_currentElement;
1383 }
1384
1385 private String findAttachment(String link) {
1386 // AttachmentManager mgr = m_engine.getAttachmentManager();
1387 // Attachment att = null;
1388 //
1389 // try
1390 // {
1391 // att = mgr.getAttachmentInfo( m_context, link );
1392 // }
1393 // catch( ProviderException e )
1394 // {
1395 // log.warn("Finding attachments failed: ",e);
1396 // return null;
1397 // }
1398 //
1399 // if( att != null )
1400 // {
1401 // return att.getName();
1402 // }
1403 // else if( link.indexOf('/') != -1 )
1404 // {
1405 // return link;
1406 // }
1407 //
1408 return null;
1409 }
1410
1411 /**
1412 * Pushes back any string that has been read. It will obviously be pushed back
1413 * in a reverse order.
1414 *
1415 * @since 2.1.77
1416 */
1417 private void pushBack(String s) throws IOException {
1418 for(int i = s.length() - 1; i >= 0; i--) {
1419 pushBack(s.charAt(i));
1420 }
1421 }
1422
1423 private Element handleBackslash() throws IOException {
1424 int ch = nextToken();
1425 if(ch == '\\') {
1426 int ch2 = nextToken();
1427 if(ch2 == '\\') {
1428 pushElement(new Element("br").setAttribute("clear", "all"));
1429 return popElement("br");
1430 }
1431 pushBack(ch2);
1432 pushElement(new Element("br"));
1433 return popElement("br");
1434 }
1435 pushBack(ch);
1436 return null;
1437 }
1438
1439 private Element handleUnderscore() throws IOException {
1440 int ch = nextToken();
1441 Element el = null;
1442 if(ch == '_') {
1443 if(m_isbold) {
1444 el = popElement("b");
1445 } else {
1446 el = pushElement(new Element("b"));
1447 }
1448 m_isbold = !m_isbold;
1449 } else {
1450 pushBack(ch);
1451 }
1452 return el;
1453 }
1454
1455 /**
1456 * For example: italics.
1457 */
1458 private Element handleApostrophe() throws IOException {
1459 int ch = nextToken();
1460 Element el = null;
1461 if(ch == '\'') {
1462 if(m_isitalic) {
1463 el = popElement("i");
1464 } else {
1465 el = pushElement(new Element("i"));
1466 }
1467 m_isitalic = !m_isitalic;
1468 } else {
1469 pushBack(ch);
1470 }
1471 return el;
1472 }
1473
1474 private Element handleOpenbrace(boolean isBlock) throws IOException {
1475 int ch = nextToken();
1476 if(ch == '{') {
1477 int ch2 = nextToken();
1478 if(ch2 == '{') {
1479 m_isPre = true;
1480 m_isEscaping = true;
1481 m_isPreBlock = isBlock;
1482 if(isBlock) {
1483 startBlockLevel();
1484 return pushElement(new Element("pre"));
1485 }
1486 return pushElement(new Element("span").setAttribute("style",
1487 "font-family:monospace; white-space:pre;"));
1488 }
1489 pushBack(ch2);
1490 return pushElement(new Element("tt"));
1491 }
1492 pushBack(ch);
1493 return null;
1494 }
1495
1496 /**
1497 * Handles both }} and }}}
1498 */
1499 private Element handleClosebrace() throws IOException {
1500 int ch2 = nextToken();
1501 if(ch2 == '}') {
1502 int ch3 = nextToken();
1503 if(ch3 == '}') {
1504 if(m_isPre) {
1505 if(m_isPreBlock) {
1506 popElement("pre");
1507 } else {
1508 popElement("span");
1509 }
1510 m_isPre = false;
1511 m_isEscaping = false;
1512 return m_currentElement;
1513 }
1514 m_plainTextBuf.append("}}}");
1515 return m_currentElement;
1516 }
1517 pushBack(ch3);
1518 if(!m_isEscaping) { return popElement("tt"); }
1519 }
1520 pushBack(ch2);
1521 return null;
1522 }
1523
1524 private Element handleDash() throws IOException {
1525 int ch = nextToken();
1526 if(ch == '-') {
1527 int ch2 = nextToken();
1528 if(ch2 == '-') {
1529 int ch3 = nextToken();
1530 if(ch3 == '-') {
1531 // Empty away all the rest of the dashes.
1532 // Do not forget to return the first non-match back.
1533 while((ch = nextToken()) == '-')
1534 ;
1535 pushBack(ch);
1536 startBlockLevel();
1537 pushElement(new Element("hr"));
1538 return popElement("hr");
1539 }
1540 pushBack(ch3);
1541 }
1542 pushBack(ch2);
1543 }
1544 pushBack(ch);
1545 return null;
1546 }
1547
1548 private Element handleHeading(String pageName) throws IOException {
1549 Element el = null;
1550 int ch = nextToken();
1551 Heading hd = new Heading();
1552 if(ch == '!') {
1553 int ch2 = nextToken();
1554 if(ch2 == '!') {
1555 String title = peekAheadLine();
1556 el = makeHeading(Heading.HEADING_LARGE, pageName, title, hd);
1557 } else {
1558 pushBack(ch2);
1559 String title = peekAheadLine();
1560 el = makeHeading(Heading.HEADING_MEDIUM, pageName, title, hd);
1561 }
1562 } else {
1563 pushBack(ch);
1564 String title = peekAheadLine();
1565 el = makeHeading(Heading.HEADING_SMALL, pageName, title, hd);
1566 }
1567 // callHeadingListenerChain( hd );
1568 if(el != null) pushElement(el);
1569 return el;
1570 }
1571
1572 /**
1573 * Reads the stream until the next EOL or EOF. Note that it will also read the
1574 * EOL from the stream.
1575 */
1576 private StringBuffer readUntilEOL() throws IOException {
1577 int ch;
1578 StringBuffer buf = new StringBuffer(256);
1579 while(true) {
1580 ch = nextToken();
1581 if(ch == -1) break;
1582 buf.append((char)ch);
1583 if(ch == '\n') break;
1584 }
1585 return buf;
1586 }
1587
1588 /** Controls whether italic is restarted after a paragraph shift */
1589 private boolean m_restartitalic = false;
1590
1591 private boolean m_restartbold = false;
1592
1593 /**
1594 * Starts a block level element, therefore closing a potential open paragraph
1595 * tag.
1596 */
1597 private void startBlockLevel() {
1598 // These may not continue over block level limits in XHTML
1599 popElement("i");
1600 popElement("b");
1601 popElement("tt");
1602 if(m_isOpenParagraph) {
1603 m_isOpenParagraph = false;
1604 popElement("p");
1605 m_plainTextBuf.append("\n"); // Just small beautification
1606 }
1607 m_restartitalic = m_isitalic;
1608 m_restartbold = m_isbold;
1609 m_isitalic = false;
1610 m_isbold = false;
1611 }
1612
1613 private static String getListType(char c) {
1614 if(c == '*') {
1615 return "ul";
1616 } else if(c == '#') { return "ol"; }
1617 throw new RuntimeException("Parser got faulty list type: " + c);
1618 }
1619
1620 /**
1621 * Like original handleOrderedList() and handleUnorderedList() however handles
1622 * both ordered ('#') and unordered ('*') mixed together.
1623 */
1624 // FIXME: Refactor this; it's a bit messy.
1625 private Element handleGeneralList() throws IOException {
1626 startBlockLevel();
1627 String strBullets = readWhile("*#");
1628 // String strBulletsRaw = strBullets; // to know what was original before
1629 // phpwiki style substitution
1630 int numBullets = strBullets.length();
1631 // override the beginning portion of bullet pattern to be like the previous
1632 // to simulate PHPWiki style lists
1633 if(m_allowPHPWikiStyleLists) {
1634 // only substitute if different
1635 if(!(strBullets.substring(0, Math.min(numBullets, m_genlistlevel))
1636 .equals(m_genlistBulletBuffer.substring(0, Math.min(numBullets,
1637 m_genlistlevel))))) {
1638 if(numBullets <= m_genlistlevel) {
1639 // Substitute all but the last character (keep the expressed bullet
1640 // preference)
1641 strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0,
1642 numBullets - 1) : "")
1643 + strBullets.substring(numBullets - 1, numBullets);
1644 } else {
1645 strBullets = m_genlistBulletBuffer
1646 + strBullets.substring(m_genlistlevel, numBullets);
1647 }
1648 }
1649 }
1650 //
1651 // Check if this is still of the same type
1652 //
1653 if(strBullets.substring(0, Math.min(numBullets, m_genlistlevel)).equals(
1654 m_genlistBulletBuffer.substring(0, Math.min(numBullets,
1655 m_genlistlevel)))) {
1656 if(numBullets > m_genlistlevel) {
1657 pushElement(new Element(
1658 getListType(strBullets.charAt(m_genlistlevel++))));
1659 // buf.append( m_renderer.openList(strBullets.charAt(m_genlistlevel++))
1660 // );
1661 for(; m_genlistlevel < numBullets; m_genlistlevel++) {
1662 // bullets are growing, get from new bullet list
1663 pushElement(new Element("li"));
1664 // buf.append( m_renderer.openListItem() );
1665 pushElement(new Element(
1666 getListType(strBullets.charAt(m_genlistlevel))));
1667 // buf.append( m_renderer.openList(strBullets.charAt(m_genlistlevel))
1668 // );
1669 }
1670 } else if(numBullets < m_genlistlevel) {
1671 // Close the previous list item.
1672 // buf.append( m_renderer.closeListItem() );
1673 popElement("li");
1674 for(; m_genlistlevel > numBullets; m_genlistlevel--) {
1675 // bullets are shrinking, get from old bullet list
1676 // buf.append(
1677 // m_renderer.closeList(m_genlistBulletBuffer.charAt(m_genlistlevel -
1678 // 1)) );
1679 popElement(getListType(m_genlistBulletBuffer
1680 .charAt(m_genlistlevel - 1)));
1681 if(m_genlistlevel > 0) {
1682 // buf.append( m_renderer.closeListItem() );
1683 popElement("li");
1684 }
1685 }
1686 } else {
1687 if(m_genlistlevel > 0) {
1688 popElement("li");
1689 // buf.append( m_renderer.closeListItem() );
1690 }
1691 }
1692 } else {
1693 //
1694 // The pattern has changed, unwind and restart
1695 //
1696 int numEqualBullets;
1697 int numCheckBullets;
1698 // find out how much is the same
1699 numEqualBullets = 0;
1700 numCheckBullets = Math.min(numBullets, m_genlistlevel);
1701 while(numEqualBullets < numCheckBullets) {
1702 // if the bullets are equal so far, keep going
1703 if(strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer
1704 .charAt(numEqualBullets))
1705 numEqualBullets++;
1706 // otherwise giveup, we have found how many are equal
1707 else break;
1708 }
1709 // unwind
1710 for(; m_genlistlevel > numEqualBullets; m_genlistlevel--) {
1711 popElement(getListType(m_genlistBulletBuffer.charAt(m_genlistlevel - 1)));
1712 // buf.append( m_renderer.closeList(
1713 // m_genlistBulletBuffer.charAt(m_genlistlevel - 1) ) );
1714 if(m_genlistlevel > 0) {
1715 // buf.append( m_renderer.closeListItem() );
1716 popElement("li");
1717 }
1718 }
1719 // rewind
1720 // buf.append( m_renderer.openList( strBullets.charAt(numEqualBullets++) )
1721 // );
1722 pushElement(new Element(getListType(strBullets.charAt(numEqualBullets++))));
1723 for(int i = numEqualBullets; i < numBullets; i++) {
1724 pushElement(new Element("li"));
1725 pushElement(new Element(getListType(strBullets.charAt(i))));
1726 // buf.append( m_renderer.openListItem() );
1727 // buf.append( m_renderer.openList( strBullets.charAt(i) ) );
1728 }
1729 m_genlistlevel = numBullets;
1730 }
1731 // buf.append( m_renderer.openListItem() );
1732 pushElement(new Element("li"));
1733 // work done, remember the new bullet list (in place of old one)
1734 m_genlistBulletBuffer.setLength(0);
1735 m_genlistBulletBuffer.append(strBullets);
1736 return m_currentElement;
1737 }
1738
1739 private Element unwindGeneralList() {
1740 // unwind
1741 for(; m_genlistlevel > 0; m_genlistlevel--) {
1742 popElement("li");
1743 popElement(getListType(m_genlistBulletBuffer.charAt(m_genlistlevel - 1)));
1744 }
1745 m_genlistBulletBuffer.setLength(0);
1746 return null;
1747 }
1748
1749 private Element handleDefinitionList() throws IOException {
1750 if(!m_isdefinition) {
1751 m_isdefinition = true;
1752 startBlockLevel();
1753 pushElement(new Element("dl"));
1754 return pushElement(new Element("dt"));
1755 }
1756 return null;
1757 }
1758
1759 private Element handleOpenbracket() throws IOException {
1760 StringBuffer sb = new StringBuffer(40);
1761 int pos = getPosition();
1762 int ch;
1763 boolean isPlugin = false;
1764 while((ch = nextToken()) == '[') {
1765 sb.append((char)ch);
1766 }
1767 if(ch == '{') {
1768 isPlugin = true;
1769 }
1770 pushBack(ch);
1771 if(sb.length() > 0) {
1772 m_plainTextBuf.append(sb);
1773 return m_currentElement;
1774 }
1775 //
1776 // Find end of hyperlink
1777 //
1778 ch = nextToken();
1779 int nesting = 1; // Check for nested plugins
1780 while(ch != -1) {
1781 int ch2 = nextToken();
1782 pushBack(ch2);
1783 if(isPlugin) {
1784 if(ch == '[' && ch2 == '{') {
1785 nesting++;
1786 } else if(nesting == 0 && ch == ']'
1787 && sb.charAt(sb.length() - 1) == '}') {
1788 break;
1789 } else if(ch == '}' && ch2 == ']') {
1790 // NB: This will be decremented once at the end
1791 nesting--;
1792 }
1793 } else {
1794 if(ch == ']') {
1795 break;
1796 }
1797 }
1798 sb.append((char)ch);
1799 ch = nextToken();
1800 }
1801 //
1802 // If the link is never finished, do some tricks to display the rest of the
1803 // line
1804 // unchanged.
1805 //
1806 if(ch == -1) {
1807 log.debug("Warning: unterminated link detected!");
1808 m_isEscaping = true;
1809 m_plainTextBuf.append(sb);
1810 flushPlainText();
1811 m_isEscaping = false;
1812 return m_currentElement;
1813 }
1814 return handleHyperlinks(sb.toString(), pos);
1815 }
1816
1817 /**
1818 * Reads the stream until the current brace is closed or stream end.
1819 */
1820 private String readBraceContent(char opening, char closing)
1821 throws IOException {
1822 StringBuffer sb = new StringBuffer(40);
1823 int braceLevel = 1;
1824 int ch;
1825 while((ch = nextToken()) != -1) {
1826 if(ch == '\\') {
1827 continue;
1828 } else if(ch == opening) {
1829 braceLevel++;
1830 } else if(ch == closing) {
1831 braceLevel--;
1832 if(braceLevel == 0) {
1833 break;
1834 }
1835 }
1836 sb.append((char)ch);
1837 }
1838 return sb.toString();
1839 }
1840
1841 /**
1842 * Handles constructs of type %%(style) and %%class
1843 *
1844 * @param newLine
1845 * @return
1846 * @throws IOException
1847 */
1848 private Element handleDiv(boolean newLine) throws IOException {
1849 int ch = nextToken();
1850 Element el = null;
1851 if(ch == '%') {
1852 String style = null;
1853 String clazz = null;
1854 ch = nextToken();
1855 //
1856 // Style or class?
1857 //
1858 if(ch == '(') {
1859 style = readBraceContent('(', ')');
1860 } else if(Character.isLetter((char)ch)) {
1861 pushBack(ch);
1862 clazz = readUntil(" \t\n\r");
1863 ch = nextToken();
1864 //
1865 // Pop out only spaces, so that the upcoming EOL check does not check
1866 // the
1867 // next line.
1868 //
1869 if(ch == '\n' || ch == '\r') {
1870 pushBack(ch);
1871 }
1872 } else {
1873 //
1874 // Anything else stops.
1875 //
1876 pushBack(ch);
1877 try {
1878 Boolean isSpan = (Boolean)m_styleStack.pop();
1879 if(isSpan == null) {
1880 // Fail quietly
1881 } else if(isSpan.booleanValue()) {
1882 el = popElement("span");
1883 } else {
1884 el = popElement("div");
1885 }
1886 } catch(EmptyStackException e) {
1887 log.debug("Page '" + "' closes a %%-block that has not been opened.");
1888 return m_currentElement;
1889 }
1890 return el;
1891 }
1892 //
1893 // Check if there is an attempt to do something nasty
1894 //
1895 style = StringEscapeUtils.unescapeHtml(style);
1896 if(style != null && style.indexOf("javascript:") != -1) {
1897 log.debug("Attempt to output javascript within CSS:" + style);
1898 return addElement(makeError("Attempt to output javascript!"));
1899 }
1900 //
1901 // Decide if we should open a div or a span?
1902 //
1903 String eol = peekAheadLine();
1904 if(eol.trim().length() > 0) {
1905 // There is stuff after the class
1906 el = new Element("span");
1907 m_styleStack.push(Boolean.TRUE);
1908 } else {
1909 startBlockLevel();
1910 el = new Element("div");
1911 m_styleStack.push(Boolean.FALSE);
1912 }
1913 if(style != null) el.setAttribute("style", style);
1914 if(clazz != null) el.setAttribute("class", clazz);
1915 el = pushElement(el);
1916 return el;
1917 }
1918 pushBack(ch);
1919 return el;
1920 }
1921
1922 private Element handleSlash(boolean newLine) throws IOException {
1923 int ch = nextToken();
1924 pushBack(ch);
1925 if(ch == '%' && !m_styleStack.isEmpty()) { return handleDiv(newLine); }
1926 return null;
1927 }
1928
1929 private Element handleBar(boolean newLine) throws IOException {
1930 Element el = null;
1931 if(!m_istable && !newLine) { return null; }
1932 if(newLine) {
1933 if(!m_istable) {
1934 startBlockLevel();
1935 el = pushElement(new Element("table")
1936 .setAttribute("class", "wikitable").setAttribute("border", "1"));
1937 m_istable = true;
1938 m_rowNum = 0;
1939 }
1940 m_rowNum++;
1941 Element tr = (m_rowNum % 2 != 0) ? new Element("tr").setAttribute(
1942 "class", "odd") : new Element("tr");
1943 el = pushElement(tr);
1944 // m_closeTag = m_renderer.closeTableItem()+m_renderer.closeTableRow();
1945 }
1946 int ch = nextToken();
1947 if(ch == '|') {
1948 if(!newLine) {
1949 el = popElement("th");
1950 }
1951 el = pushElement(new Element("th"));
1952 } else {
1953 if(!newLine) {
1954 el = popElement("td");
1955 }
1956 el = pushElement(new Element("td"));
1957 pushBack(ch);
1958 }
1959 return el;
1960 }
1961
1962 /**
1963 * Generic escape of next character or entity.
1964 */
1965 private Element handleTilde() throws IOException {
1966 int ch = nextToken();
1967 if(ch == ' ') return m_currentElement;
1968 if(ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#'
1969 || ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '['
1970 || ch == '{' || ch == ']' || ch == '}' || ch == '%') {
1971 m_plainTextBuf.append((char)ch);
1972 m_plainTextBuf.append(readWhile("" + (char)ch));
1973 return m_currentElement;
1974 }
1975 // No escape.
1976 pushBack(ch);
1977 return null;
1978 }
1979
1980 private void fillBuffer(Element startElement) throws IOException {
1981 m_currentElement = startElement;
1982 boolean quitReading = false;
1983 boolean newLine = true; // FIXME: not true if reading starts in middle of
1984 // buffer
1985 disableOutputEscaping();
1986 while(!quitReading) {
1987 int ch = nextToken();
1988 Element el = null;
1989 //
1990 // Check if we're actually ending the preformatted mode.
1991 // We still must do an entity transformation here.
1992 //
1993 if(m_isEscaping) {
1994 if(ch == '}') {
1995 if(handleClosebrace() == null) m_plainTextBuf.append((char)ch);
1996 } else if(ch == -1) {
1997 quitReading = true;
1998 } else if(ch == '\r') {
1999 // DOS line feeds we ignore.
2000 } else if(ch == '<') {
2001 m_plainTextBuf.append("<");
2002 } else if(ch == '>') {
2003 m_plainTextBuf.append(">");
2004 } else if(ch == '&') {
2005 m_plainTextBuf.append("&");
2006 } else if(ch == '~') {
2007 String braces = readWhile("}");
2008 if(braces.length() >= 3) {
2009 m_plainTextBuf.append("}}}");
2010 braces = braces.substring(3);
2011 } else {
2012 m_plainTextBuf.append((char)ch);
2013 }
2014 for(int i = braces.length() - 1; i >= 0; i--) {
2015 pushBack(braces.charAt(i));
2016 }
2017 } else {
2018 m_plainTextBuf.append((char)ch);
2019 }
2020 continue;
2021 }
2022 //
2023 // An empty line stops a list
2024 //
2025 if(newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0) {
2026 m_plainTextBuf.append(unwindGeneralList());
2027 }
2028 if(newLine && ch != '|' && m_istable) {
2029 el = popElement("table");
2030 m_istable = false;
2031 }
2032 //
2033 // Now, check the incoming token.
2034 //
2035 switch(ch){
2036 case '\r':
2037 // DOS linefeeds we forget
2038 continue;
2039 case '\n':
2040 //
2041 // Close things like headings, etc.
2042 //
2043 // FIXME: This is not really very fast
2044 popElement("dl"); // Close definition lists.
2045 popElement("h2");
2046 popElement("h3");
2047 popElement("h4");
2048 if(m_istable) {
2049 popElement("tr");
2050 }
2051 m_isdefinition = false;
2052 if(newLine) {
2053 // Paragraph change.
2054 startBlockLevel();
2055 //
2056 // Figure out which elements cannot be enclosed inside
2057 // a <p></p> pair according to XHTML rules.
2058 //
2059 String nextLine = peekAheadLine();
2060 if(nextLine.length() == 0
2061 || (nextLine.length() > 0 && !nextLine.startsWith("{{{")
2062 && !nextLine.startsWith("----")
2063 && !nextLine.startsWith("%%") && "*#!;"
2064 .indexOf(nextLine.charAt(0)) == -1)) {
2065 pushElement(new Element("p"));
2066 m_isOpenParagraph = true;
2067 if(m_restartitalic) {
2068 pushElement(new Element("i"));
2069 m_isitalic = true;
2070 m_restartitalic = false;
2071 }
2072 if(m_restartbold) {
2073 pushElement(new Element("b"));
2074 m_isbold = true;
2075 m_restartbold = false;
2076 }
2077 }
2078 } else {
2079 m_plainTextBuf.append("\n");
2080 newLine = true;
2081 }
2082 continue;
2083 case '\\':
2084 el = handleBackslash();
2085 break;
2086 case '_':
2087 el = handleUnderscore();
2088 break;
2089 case '\'':
2090 el = handleApostrophe();
2091 break;
2092 case '{':
2093 el = handleOpenbrace(newLine);
2094 break;
2095 case '}':
2096 el = handleClosebrace();
2097 break;
2098 case '-':
2099 if(newLine) el = handleDash();
2100 break;
2101 case '!':
2102 if(newLine) {
2103 el = handleHeading(null);
2104 }
2105 break;
2106 case ';':
2107 if(newLine) {
2108 el = handleDefinitionList();
2109 }
2110 break;
2111 case ':':
2112 if(m_isdefinition) {
2113 popElement("dt");
2114 el = pushElement(new Element("dd"));
2115 m_isdefinition = false;
2116 }
2117 break;
2118 case '[':
2119 el = handleOpenbracket();
2120 break;
2121 case '*':
2122 if(newLine) {
2123 pushBack('*');
2124 el = handleGeneralList();
2125 }
2126 break;
2127 case '#':
2128 if(newLine) {
2129 pushBack('#');
2130 el = handleGeneralList();
2131 }
2132 break;
2133 case '|':
2134 el = handleBar(newLine);
2135 break;
2136 case '~':
2137 el = handleTilde();
2138 break;
2139 case '%':
2140 el = handleDiv(newLine);
2141 break;
2142 case '/':
2143 el = handleSlash(newLine);
2144 break;
2145 case -1:
2146 quitReading = true;
2147 continue;
2148 }
2149 //
2150 // The idea is as follows: If the handler method returns
2151 // an element (el != null), it is assumed that it has been
2152 // added in the stack. Otherwise the character is added
2153 // as is to the plaintext buffer.
2154 //
2155 // For the transition phase, if s != null, it also gets
2156 // added in the plaintext buffer.
2157 //
2158 if(el != null) {
2159 newLine = false;
2160 } else {
2161 m_plainTextBuf.append((char)ch);
2162 newLine = false;
2163 }
2164 }
2165 popElement("domroot");
2166 }
2167
2168 public Document parse() throws IOException {
2169 Element rootElement = new Element("html");
2170 Element headElem = new Element("head");
2171 rootElement.addContent(headElem);
2172 Element bodyElem = new Element("body");
2173 rootElement.addContent(bodyElem);
2174 Document d = new Document(rootElement);
2175 try {
2176 fillBuffer(bodyElem);
2177 } catch(IllegalDataException e) {
2178 log.error("Page "
2179 + " contained something that cannot be added in the DOM tree", e);
2180 throw new IOException("Illegal page data: " + e.getMessage());
2181 }
2182 return d;
2183 }
2184
2185 /**
2186 * Compares two Strings, and if one starts with the other, then returns null.
2187 * Otherwise just like the normal Comparator for strings.
2188 *
2189 * @author jalkanen
2190 *
2191 * @since
2192 */
2193 private static class StartingComparator implements Comparator {
2194 public int compare(Object arg0, Object arg1) {
2195 String s1 = (String)arg0;
2196 String s2 = (String)arg1;
2197 if(s1.length() > s2.length()) {
2198 if(s1.startsWith(s2) && s2.length() > 1) return 0;
2199 } else {
2200 if(s2.startsWith(s1) && s1.length() > 1) return 0;
2201 }
2202 return s1.compareTo(s2);
2203 }
2204 }
2205
2206 private static class Heading {
2207 public static final int HEADING_SMALL = 1;
2208
2209 public static final int HEADING_MEDIUM = 2;
2210
2211 public static final int HEADING_LARGE = 3;
2212
2213 public int m_level;
2214
2215 public String m_titleText;
2216
2217 public String m_titleAnchor;
2218
2219 public String m_titleSection;
2220 }
2221
2222 private static class CleanTextRenderer {
2223 protected static final Logger log = Logger
2224 .getLogger(CleanTextRenderer.class);
2225
2226 protected Document m_document;
2227
2228 public CleanTextRenderer(/* WikiContext context, */Document doc) {
2229 this.m_document = doc;
2230 }
2231
2232 public String getString() throws IOException {
2233 StringBuffer sb = new StringBuffer();
2234 try {
2235 XPath xp = XPath.newInstance("//text()");
2236 List nodes = xp.selectNodes(m_document.getDocument());
2237 for(Iterator i = nodes.iterator(); i.hasNext();) {
2238 Object el = i.next();
2239 if(el instanceof Text) {
2240 sb.append(((Text)el).getValue());
2241 }
2242 }
2243 } catch(JDOMException e) {
2244 log.error("Could not parse XPATH expression");
2245 throw new IOException(e.getMessage());
2246 }
2247 return sb.toString();
2248 }
2249 }
2250 }
|