gate/yam/convert/JSPWikiMarkupParser.java


JSPWikiMarkupParser.java


0001 /* 

0002     JSPWikiMarkupParser.java

0003 

0004     This is a severely hacked-up version of the original class from 

0005     <a href="http://jspwiki.org/">JSPWiki</a>. It is used to perform a simple 

0006     conversion to HTML for content using JSPWiki format. 

0007     

0008     JSPWiki - a JSP-based WikiWiki clone.

0009 

0010     Copyright (C) 2001-2005 Janne Jalkanen (Janne.Jalkanen@iki.fi)

0011 

0012     This program is free software; you can redistribute it and/or modify

0013     it under the terms of the GNU Lesser General Public License as published by

0014     the Free Software Foundation; either version 2.1 of the License, or

0015     (at your option) any later version.

0016 

0017     This program is distributed in the hope that it will be useful,

0018     but WITHOUT ANY WARRANTY; without even the implied warranty of

0019     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

0020     GNU Lesser General Public License for more details.

0021 

0022     You should have received a copy of the GNU Lesser General Public License

0023     along with this program; if not, write to the Free Software

0024     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

0025 */

0026 

0027 package gate.yam.convert;

0028 

0029 import java.io.*;

0030 import java.util.*;

0031 

0032 import javax.xml.transform.Result;

0033 

0034 import org.apache.commons.lang.StringEscapeUtils;

0035 import org.apache.commons.lang.StringUtils;

0036 import org.apache.log4j.Logger;

0037 import org.apache.oro.text.GlobCompiler;

0038 import org.apache.oro.text.regex.*;

0039 import org.jdom.*;

0040 import org.jdom.xpath.XPath;

0041 

0042 /**

0043  * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the

0044  * heart and soul of JSPWiki : make sure you test properly anything that is

0045  * added, or else it breaks down horribly.

0046  * 

0047  * @author Janne Jalkanen

0048  * @since 2.4

0049  */

0050 public class JSPWikiMarkupParser{

0051 ///////////////////////////MARKUP PARSER

0052   /** Allow this many characters to be pushed back in the stream.  In effect,

0053   this limits the size of a single line.  */

0054 protected static final int              PUSHBACK_BUFFER_SIZE = 10*1024;

0055 protected PushbackReader                m_in;

0056 private int              m_pos = -1; // current position in reader stream

0057 

0058 //protected WikiEngine     m_engine;

0059 //protected WikiContext    m_context;

0060 

0061 /** Optionally stores internal wikilinks */

0062 protected ArrayList      m_localLinkMutatorChain    = new ArrayList();

0063 protected ArrayList      m_externalLinkMutatorChain = new ArrayList();

0064 protected ArrayList      m_attachmentLinkMutatorChain = new ArrayList();

0065 protected ArrayList      m_headingListenerChain     = new ArrayList();

0066 protected ArrayList      m_linkMutators             = new ArrayList();

0067 

0068 protected boolean        m_inlineImages             = true;

0069 

0070 protected boolean        m_parseAccessRules = true;

0071 /** If set to "true", allows using raw HTML within Wiki text.  Be warned,

0072   this is a VERY dangerous option to set - never turn this on in a publicly

0073   allowable Wiki, unless you are absolutely certain of what you're doing. */

0074 public static final String     PROP_ALLOWHTML        = "jspwiki.translatorReader.allowHTML";

0075 /** If set to "true", enables plugins during parsing */

0076 public static final String     PROP_RUNPLUGINS       = "jspwiki.translatorReader.runPlugins";

0077 

0078 /** Lists all punctuation characters allowed in WikiMarkup. These

0079   will not be cleaned away. */

0080 

0081 protected static final String           PUNCTUATION_CHARS_ALLOWED = "._";

0082 

0083 /**

0084 *  Replaces the current input character stream with a new one.

0085 *  @param in New source for input.  If null, this method does nothing.

0086 *  @return the old stream

0087 */

0088 public Reader setInputReader( Reader in )

0089 {

0090   Reader old = m_in;

0091 

0092   if( in != null )

0093   {

0094       m_in = new PushbackReader( new BufferedReader( in ),

0095                                  PUSHBACK_BUFFER_SIZE );

0096   }

0097 

0098   return old;

0099 }

0100 

0101 ///**

0102 //*  Adds a hook for processing link texts.  This hook is called

0103 //*  when the link text is written into the output stream, and

0104 //*  you may use it to modify the text.  It does not affect the

0105 //*  actual link, only the user-visible text.

0106 //*

0107 //*  @param mutator The hook to call.  Null is safe.

0108 //*/

0109 //public void addLinkTransmutator( StringTransmutator mutator )

0110 //{

0111 //  if( mutator != null )

0112 //  {

0113 //      m_linkMutators.add( mutator );

0114 //  }

0115 //}

0116 

0117 ///**

0118 //*  Adds a hook for processing local links.  The engine

0119 //*  transforms both non-existing and existing page links.

0120 //*

0121 //*  @param mutator The hook to call.  Null is safe.

0122 //*/

0123 //public void addLocalLinkHook( StringTransmutator mutator )

0124 //{

0125 //  if( mutator != null )

0126 //  {

0127 //      m_localLinkMutatorChain.add( mutator );

0128 //  }

0129 //}

0130 //

0131 ///**

0132 //*  Adds a hook for processing external links.  This includes

0133 //*  all http:// ftp://, etc. links, including inlined images.

0134 //*

0135 //*  @param mutator The hook to call.  Null is safe.

0136 //*/

0137 //public void addExternalLinkHook( StringTransmutator mutator )

0138 //{

0139 //  if( mutator != null )

0140 //  {

0141 //      m_externalLinkMutatorChain.add( mutator );

0142 //  }

0143 //}

0144 //

0145 ///**

0146 //*  Adds a hook for processing attachment links.

0147 //*

0148 //*  @param mutator The hook to call.  Null is safe.

0149 //*/

0150 //public void addAttachmentLinkHook( StringTransmutator mutator )

0151 //{

0152 //  if( mutator != null )

0153 //  {

0154 //      m_attachmentLinkMutatorChain.add( mutator );

0155 //  }

0156 //}

0157 

0158 //public void addHeadingListener( HeadingListener listener )

0159 //{

0160 //  if( listener != null )

0161 //  {

0162 //      m_headingListenerChain.add( listener );

0163 //  }

0164 //}

0165 //

0166 public void disableAccessRules()

0167 {

0168   m_parseAccessRules = false;

0169 }

0170 

0171 /**

0172 *  Use this to turn on or off image inlining.

0173 *  @param toggle If true, images are inlined (as per set in jspwiki.properties)

0174 *                If false, then images won't be inlined; instead, they will be

0175 *                treated as standard hyperlinks.

0176 *  @since 2.2.9

0177 */

0178 public void enableImageInlining( boolean toggle )

0179 {

0180   m_inlineImages = toggle;

0181 }

0182 

0183 

0184 /** 

0185 *  Return the current position in the reader stream.

0186 *  The value will be -1 prior to reading.

0187 * @return the reader position as an int.

0188 */

0189 public int getPosition()

0190 {

0191   return m_pos;

0192 }

0193 

0194 protected int nextToken()

0195   throws IOException

0196 {

0197   if( m_in == null ) return -1;

0198   m_pos++;

0199   return m_in.read();

0200 }

0201 

0202 /**

0203 *  Push back any character to the current input.  Does not

0204 *  push back a read EOF, though.

0205 */

0206 protected void pushBack( int c )

0207   throws IOException

0208 {        

0209   if( c != -1 && m_in != null )

0210   {

0211       m_pos--;

0212       m_in.unread( c );

0213   }

0214 }

0215 

0216 /**

0217 *  Cleans a Wiki name.

0218 *  <P>

0219 *  [ This is a link ] -&gt; ThisIsALink

0220 *

0221 *  @param link Link to be cleared. Null is safe, and causes this to return null.

0222 *  @return A cleaned link.

0223 *

0224 *  @since 2.0

0225 */

0226 public static String cleanLink( String link )

0227 {

0228   if( link == null ) return null;

0229 

0230   StringBuffer clean = new StringBuffer(link.length());

0231 

0232   //

0233   //  Remove non-alphanumeric characters that should not

0234   //  be put inside WikiNames.  Note that all valid

0235   //  Unicode letters are considered okay for WikiNames.

0236   //  It is the problem of the WikiPageProvider to take

0237   //  care of actually storing that information.

0238   //

0239   //  Also capitalize things, if necessary.

0240   //

0241 

0242   boolean isWord = true;  // If true, we've just crossed a word boundary

0243   

0244   for( int i = 0; i < link.length(); i++ )

0245   {

0246       char ch = link.charAt(i);

0247 

0248       if( Character.isLetterOrDigit( ch ) || PUNCTUATION_CHARS_ALLOWED.indexOf(ch) != -1 )

0249       {

0250           // Is a letter

0251           

0252           if( isWord ) ch = Character.toUpperCase( ch );

0253           clean.append( ch );

0254           isWord = false;

0255       }

0256       else

0257       {

0258           isWord = true;

0259       }

0260   }

0261 

0262   return clean.toString();

0263 }

0264   

0265   

0266   

0267 //////////////////////////END MP ///////////////////

0268   

0269   

0270   

0271   

0272   

0273   

0274   

0275   /** Name of the outlink image; relative path to the JSPWiki directory. */

0276   private static final String OUTLINK_IMAGE = "images/out.png";

0277 

0278   /**

0279    * The value for anchor element <tt>class</tt> attributes when used for wiki

0280    * page (normal) links. The value is "wikipage".

0281    */

0282   public static final String CLASS_WIKIPAGE = "wikipage";

0283 

0284   /**

0285    * The value for anchor element <tt>class</tt> attributes when used for edit

0286    * page links. The value is "editpage".

0287    */

0288   public static final String CLASS_EDITPAGE = "editpage";

0289 

0290   /**

0291    * The value for anchor element <tt>class</tt> attributes when used for

0292    * interwiki page links. The value is "interwiki".

0293    */

0294   public static final String CLASS_INTERWIKI = "interwiki";

0295 

0296   private static final int READ = 0;

0297 

0298   private static final int EDIT = 1;

0299 

0300   private static final int EMPTY = 2; // Empty message

0301 

0302   private static final int LOCAL = 3;

0303 

0304   private static final int LOCALREF = 4;

0305 

0306   private static final int IMAGE = 5;

0307 

0308   private static final int EXTERNAL = 6;

0309 

0310   private static final int INTERWIKI = 7;

0311 

0312   private static final int IMAGELINK = 8;

0313 

0314   private static final int IMAGEWIKILINK = 9;

0315 

0316   private static final int ATTACHMENT = 10;

0317 

0318   // private static final int ATTACHMENTIMAGE = 11;

0319   private static Logger log = Logger.getLogger(JSPWikiMarkupParser.class);

0320 

0321   // private boolean m_iscode = false;

0322   private boolean m_isbold = false;

0323 

0324   private boolean m_isitalic = false;

0325 

0326   private boolean m_istable = false;

0327 

0328   private boolean m_isPre = false;

0329 

0330   private boolean m_isEscaping = false;

0331 

0332   private boolean m_isdefinition = false;

0333 

0334   private boolean m_isPreBlock = false;

0335 

0336   /** Contains style information, in multiple forms. */

0337   private Stack m_styleStack = new Stack();

0338 

0339   // general list handling

0340   private int m_genlistlevel = 0;

0341 

0342   private StringBuffer m_genlistBulletBuffer = new StringBuffer(10); // stores

0343                                                                       // the #

0344                                                                       // and *

0345                                                                       // pattern

0346 

0347   private boolean m_allowPHPWikiStyleLists = true;

0348 

0349   private boolean m_isOpenParagraph = false;

0350 

0351   /** Keeps image regexp Patterns */

0352   private ArrayList m_inlineImagePatterns;

0353 

0354   private PatternMatcher m_inlineMatcher = new Perl5Matcher();

0355 

0356   /** Keeps track of any plain text that gets put in the Text nodes */

0357   private StringBuffer m_plainTextBuf = new StringBuffer(20);

0358 

0359   private Element m_currentElement;

0360 

0361   /**

0362    * This property defines the inline image pattern. It's current value is

0363    * jspwiki.translatorReader.inlinePattern

0364    */

0365   public static final String PROP_INLINEIMAGEPTRN = "jspwiki.translatorReader.inlinePattern";

0366 

0367   /** If true, consider CamelCase hyperlinks as well. */

0368   public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks";

0369 

0370   /**

0371    * If true, all hyperlinks are translated as well, regardless whether they are

0372    * surrounded by brackets.

0373    */

0374   public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris";

0375 

0376   /**

0377    * If true, all outward links (external links) have a small link image

0378    * appended.

0379    */

0380   public static final String PROP_USEOUTLINKIMAGE = "jspwiki.translatorReader.useOutlinkImage";

0381 

0382   /**

0383    * If true, all outward attachment info links have a small link image

0384    * appended.

0385    */

0386   public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";

0387 

0388   /** If set to "true", all external links are tagged with 'rel="nofollow"' */

0389   public static final String PROP_USERELNOFOLLOW = "jspwiki.translatorReader.useRelNofollow";

0390 

0391   /** If true, then considers CamelCase links as well. */

0392   private boolean m_camelCaseLinks = false;

0393 

0394   /** If true, consider URIs that have no brackets as well. */

0395   // FIXME: Currently reserved, but not used.

0396   private boolean m_plainUris = false;

0397 

0398   /** If true, all outward links use a small link image. */

0399   private boolean m_useOutlinkImage = true;

0400 

0401   private boolean m_useAttachmentImage = true;

0402 

0403   /** If true, allows raw HTML. */

0404   private boolean m_allowHTML = false;

0405 

0406   private boolean m_useRelNofollow = false;

0407 

0408   private PatternCompiler m_compiler = new Perl5Compiler();

0409 

0410   static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;]+))";

0411 

0412   private PatternMatcher m_camelCaseMatcher = new Perl5Matcher();

0413 

0414   private Pattern m_camelCasePattern;

0415 

0416   private int m_rowNum = 1;

0417 

0418   /**

0419    * The default inlining pattern. Currently "*.png"

0420    */

0421   public static final String DEFAULT_INLINEPATTERN = "*.png";

0422 

0423   /**

0424    * This list contains all IANA registered URI protocol types as of September

0425    * 2004 + a few well-known extra types.

0426    * 

0427    * JSPWiki recognises all of them as external links.

0428    * 

0429    * This array is sorted during class load, so you can just dump here whatever

0430    * you want in whatever order you want.

0431    */

0432   static final String[] c_externalLinks = {"http:", "ftp:", "https:",

0433       "mailto:", "news:", "file:", "rtsp:", "mms:", "ldap:", "gopher:",

0434       "nntp:", "telnet:", "wais:", "prospero:", "z39.50s", "z39.50r", "vemmi:",

0435       "imap:", "nfs:", "acap:", "tip:", "pop:", "dav:", "opaquelocktoken:",

0436       "sip:", "sips:", "tel:", "fax:", "modem:", "soap.beep:", "soap.beeps",

0437       "xmlrpc.beep", "xmlrpc.beeps", "urn:", "go:", "h323:", "ipp:", "tftp:",

0438       "mupdate:", "pres:", "im:", "mtqp", "smb:"};

0439 

0440   /**

0441    * This Comparator is used to find an external link from c_externalLinks. It

0442    * checks if the link starts with the other arraythingie.

0443    */

0444   private static Comparator c_startingComparator = new StartingComparator();

0445   static {

0446     Arrays.sort(c_externalLinks);

0447   }

0448 

0449   /**

0450    * Creates a markup parser.

0451    */

0452   public JSPWikiMarkupParser(Reader in) {

0453     setInputReader( in );

0454     initialize();

0455   }

0456 

0457   /**

0458    * @param engine

0459    *          The WikiEngine this reader is attached to. Is used to figure out

0460    *          of a page exits.

0461    */

0462   // FIXME: parsers should be pooled for better performance.

0463   private void initialize() {

0464     PatternCompiler compiler = new GlobCompiler();

0465     ArrayList compiledpatterns = new ArrayList();

0466     Collection ptrns = getImagePatterns(/* m_engine */);

0467     //

0468     // Make them into Regexp Patterns. Unknown patterns

0469     // are ignored.

0470     //

0471     for(Iterator i = ptrns.iterator(); i.hasNext();) {

0472       try {

0473         compiledpatterns.add(compiler.compile((String)i.next()));

0474       } catch(MalformedPatternException e) {

0475         log.error("Malformed pattern in properties: ", e);

0476       }

0477     }

0478     m_inlineImagePatterns = compiledpatterns;

0479     try {

0480       m_camelCasePattern = m_compiler.compile(WIKIWORD_REGEX);

0481     } catch(MalformedPatternException e) {

0482       log.fatal("Internal error: Someone put in a faulty pattern.", e);

0483       throw new RuntimeException("Faulty camelcasepattern in TranslatorReader");

0484     }

0485     //

0486     // Set the properties.

0487     //

0488     Properties props = new Properties();

0489     m_camelCaseLinks = getBooleanProperty(props, PROP_CAMELCASELINKS,

0490             m_camelCaseLinks);

0491     m_plainUris = getBooleanProperty(props, PROP_PLAINURIS,

0492             m_plainUris);

0493     m_useOutlinkImage = getBooleanProperty(props,

0494             PROP_USEOUTLINKIMAGE, m_useOutlinkImage);

0495     m_useAttachmentImage = getBooleanProperty(props,

0496             PROP_USEATTACHMENTIMAGE, m_useAttachmentImage);

0497     m_allowHTML = getBooleanProperty(props,

0498             PROP_ALLOWHTML, m_allowHTML);

0499     m_useRelNofollow = getBooleanProperty(props, PROP_USERELNOFOLLOW,

0500             m_useRelNofollow);

0501     // if( m_engine.getUserManager().getUserDatabase() == null ||

0502     // m_engine.getAuthorizationManager() == null )

0503     // {

0504     disableAccessRules();

0505     // }

0506     // m_context.getPage().setHasMetadata();

0507   }

0508 

0509   /**

0510    * Figure out which image suffixes should be inlined.

0511    * 

0512    * @return Collection of Strings with patterns.

0513    */

0514   // FIXME: Does not belong here; should be elsewhere

0515   public static Collection getImagePatterns( /* WikiEngine engine */) {

0516     // Properties props = engine.getWikiProperties();

0517     ArrayList ptrnlist = new ArrayList();

0518     ptrnlist.add(DEFAULT_INLINEPATTERN);

0519     return ptrnlist;

0520   }

0521 

0522   /**

0523    * Returns link name, if it exists; otherwise it returns null.

0524    */

0525   private String linkExists(String page) {

0526     return page;

0527   }

0528 

0529   private Element makeLink(int type, String link, String text, String section) {

0530     Element el = null;

0531     if(text == null) text = link;

0532     // text = callMutatorChain( m_linkMutators, text );

0533     section = (section != null) ? ("#" + section) : "";

0534     // Make sure we make a link name that can be accepted

0535     // as a valid URL.

0536     if(link.length() == 0) {

0537       type = EMPTY;

0538     }

0539     switch(type){

0540       case READ:

0541         el = new Element("a").setAttribute("class", CLASS_WIKIPAGE);

0542         el.setAttribute("href", "VIEW" + link + section);

0543         el.addContent(text);

0544         break;

0545       case EDIT:

0546         el = new Element("a").setAttribute("class", CLASS_EDITPAGE);

0547         el.setAttribute("title", "Create '" + link + "'");

0548         el.setAttribute("href", "EDIT" + link);

0549         el.addContent(text);

0550         break;

0551       case EMPTY:

0552         el = new Element("u").addContent(text);

0553         break;

0554       //

0555       // These two are for local references - footnotes and

0556       // references to footnotes.

0557       // We embed the page name (or whatever WikiContext gives us)

0558       // to make sure the links are unique across Wiki.

0559       //

0560       case LOCALREF:

0561         el = new Element("a").setAttribute("class", "footnoteref");

0562         el.setAttribute("href", "#ref-" + link);

0563         el.addContent("[" + text + "]");

0564         break;

0565       case LOCAL:

0566         el = new Element("a").setAttribute("class", "footnote");

0567         el.setAttribute("name", "ref-" + link.substring(1));

0568         el.addContent("[" + text + "]");

0569         break;

0570       //

0571       // With the image, external and interwiki types we need to

0572       // make sure nobody can put in Javascript or something else

0573       // annoying into the links themselves. We do this by preventing

0574       // a haxor from stopping the link name short with quotes in

0575       // fillBuffer().

0576       //

0577       case IMAGE:

0578         el = new Element("img").setAttribute("class", "inline");

0579         el.setAttribute("src", link);

0580         el.setAttribute("alt", text);

0581         break;

0582       case IMAGELINK:

0583         el = new Element("img").setAttribute("class", "inline");

0584         el.setAttribute("src", link);

0585         el.setAttribute("alt", text);

0586         el = new Element("a").setAttribute("href", text).addContent(el);

0587         break;

0588       case IMAGEWIKILINK:

0589         String pagelink = text;

0590         el = new Element("img").setAttribute("class", "inline");

0591         el.setAttribute("src", link);

0592         el.setAttribute("alt", text);

0593         el = new Element("a").setAttribute("class", CLASS_WIKIPAGE)

0594                 .setAttribute("href", pagelink).addContent(el);

0595         break;

0596       case EXTERNAL:

0597         el = new Element("a").setAttribute("class", "external");

0598         if(m_useRelNofollow) el.setAttribute("rel", "nofollow");

0599         el.setAttribute("href", link + section);

0600         el.addContent(text);

0601         break;

0602       case INTERWIKI:

0603         el = new Element("a").setAttribute("class", CLASS_INTERWIKI);

0604         el.setAttribute("href", link + section);

0605         el.addContent(text);

0606         break;

0607       case ATTACHMENT:

0608         String attlink = "ATTACH" + link;

0609         String infolink = "INFO" + link;

0610         String imglink = "NONE" + "images/attachment_small.png";

0611         el = new Element("a").setAttribute("class", "attachment");

0612         el.setAttribute("href", attlink);

0613         el.addContent(text);

0614         pushElement(el);

0615         popElement(el.getName());

0616         if(m_useAttachmentImage) {

0617           el = new Element("img").setAttribute("src", imglink);

0618           el.setAttribute("border", "0");

0619           el.setAttribute("alt", "(info)");

0620           el = new Element("a").setAttribute("href", infolink).addContent(el);

0621         } else {

0622           el = null;

0623         }

0624         break;

0625       default:

0626         break;

0627     }

0628     if(el != null) {

0629       flushPlainText();

0630       m_currentElement.addContent(el);

0631     }

0632     return el;

0633   }

0634 

0635   /**

0636    * Figures out if a link is an off-site link. This recognizes the most common

0637    * protocols by checking how it starts.

0638    * 

0639    * @since 2.4

0640    */

0641   public static boolean isExternalLink(String link) {

0642     int idx = Arrays.binarySearch(c_externalLinks, link, c_startingComparator);

0643     //

0644     // We need to check here once again; otherwise we might

0645     // get a match for something like "h".

0646     //

0647     if(idx >= 0 && link.startsWith(c_externalLinks[idx])) return true;

0648     return false;

0649   }

0650   

0651   

0652   /**

0653    *  Gets a boolean property from a standard Properties list.

0654    *  Returns the default value, in case the key has not been set.

0655    *  <P>

0656    *  The possible values for the property are "true"/"false", "yes"/"no", or

0657    *  "on"/"off".  Any value not recognized is always defined as "false".

0658    *

0659    *  @param props   A list of properties to search.

0660    *  @param key     The property key.

0661    *  @param defval  The default value to return.

0662    *

0663    *  @return True, if the property "key" was set to "true", "on", or "yes".

0664    *

0665    *  @since 2.0.11

0666    */

0667   public static boolean getBooleanProperty( Properties props, 

0668                                             String key, 

0669                                             boolean defval )

0670   {

0671       String val = props.getProperty( key );

0672 

0673       if( val == null ) return defval;

0674 

0675       return isPositive( val );

0676   }

0677   

0678   /**

0679    *  Returns true, if the string "val" denotes a positive string.  Allowed

0680    *  values are "yes", "on", and "true".  Comparison is case-insignificant.

0681    *  Null values are safe.

0682    *

0683    *  @param val Value to check.

0684    *  @return True, if val is "true", "on", or "yes"; otherwise false.

0685    *

0686    *  @since 2.0.26

0687    */

0688   public static boolean isPositive( String val )

0689   {

0690       if( val == null ) return false;

0691 

0692       val = val.trim();

0693       

0694       return ( val.equalsIgnoreCase("true") || val.equalsIgnoreCase("on") ||

0695                val.equalsIgnoreCase("yes") );

0696   }  

0697 

0698   

0699   /**

0700    *  Returns true, if the argument contains a number, otherwise false.

0701    *  In a quick test this is roughly the same speed as Integer.parseInt()

0702    *  if the argument is a number, and roughly ten times the speed, if

0703    *  the argument is NOT a number.

0704    *  

0705    *  @since 2.4

0706    */

0707   

0708   public static boolean isNumber( String s )

0709   {

0710       if( s == null ) return false;

0711   

0712       if( s.length() > 1 && s.charAt(0) == '-' )

0713           s = s.substring(1);

0714   

0715       for( int i = 0; i < s.length(); i++ )

0716       {

0717           if( !Character.isDigit(s.charAt(i)) )

0718               return false;

0719       }

0720   

0721       return true;

0722   }

0723   /**

0724    * Returns true, if the link in question is an access rule.

0725    */

0726   private static boolean isAccessRule(String link) {

0727     return link.startsWith("{ALLOW") || link.startsWith("{DENY");

0728   }

0729 

0730   /**

0731    * Matches the given link to the list of image name patterns to determine

0732    * whether it should be treated as an inline image or not.

0733    */

0734   private boolean isImageLink(String link) {

0735     if(m_inlineImages) {

0736       for(Iterator i = m_inlineImagePatterns.iterator(); i.hasNext();) {

0737         if(m_inlineMatcher.matches(link, (Pattern)i.next())) return true;

0738       }

0739     }

0740     return false;

0741   }

0742 

0743   private static boolean isMetadata(String link) {

0744     return link.startsWith("{SET");

0745   }

0746 

0747   /**

0748    * This method peeks ahead in the stream until EOL and returns the result. It

0749    * will keep the buffers untouched.

0750    * 

0751    * @return The string from the current position to the end of line.

0752    */

0753   // FIXME: Always returns an empty line, even if the stream is full.

0754   private String peekAheadLine() throws IOException {

0755     String s = readUntilEOL().toString();

0756     if(s.length() > PUSHBACK_BUFFER_SIZE) {

0757       log

0758               .warn("Line is longer than maximum allowed size ("

0759                       + PUSHBACK_BUFFER_SIZE

0760                       + " characters.  Attempting to recover...");

0761       pushBack(s.substring(0, PUSHBACK_BUFFER_SIZE - 1));

0762     } else {

0763       try {

0764         pushBack(s);

0765       } catch(IOException e) {

0766         log

0767                 .warn("Pushback failed: the line is probably too long.  Attempting to recover.");

0768       }

0769     }

0770     return s;

0771   }

0772 

0773   /**

0774    * Writes HTML for error message.

0775    */

0776   public static Element makeError(String error) {

0777     return new Element("span").setAttribute("class", "error").addContent(error);

0778   }

0779 

0780   private int flushPlainText() {

0781     int numChars = m_plainTextBuf.length();

0782     if(numChars > 0) {

0783       String buf;

0784       if(!m_allowHTML) {

0785         buf = escapeHTMLEntities(m_plainTextBuf);

0786       } else {

0787         buf = m_plainTextBuf.toString();

0788       }

0789       //

0790       // We must first empty the buffer because the side effect of

0791       // calling makeCamelCaseLink() is to call this routine.

0792       //

0793       m_plainTextBuf = new StringBuffer(20);

0794       try {

0795         //

0796         // This is the heaviest part of parsing, and therefore we can

0797         // do some optimization here.

0798         //

0799         // 1) Only when the length of the buffer is big enough, we try to do the

0800         // match

0801         //

0802         if(m_camelCaseLinks && !m_isEscaping && buf.length() > 3) {

0803           // System.out.println("Buffer="+buf);

0804           while(m_camelCaseMatcher.contains(buf, m_camelCasePattern)) {

0805             MatchResult result = m_camelCaseMatcher.getMatch();

0806             String firstPart = buf.substring(0, result.beginOffset(0));

0807             String prefix = result.group(1);

0808             if(prefix == null) prefix = "";

0809             String camelCase = result.group(2);

0810             String protocol = result.group(3);

0811             String uri = protocol + result.group(4);

0812             buf = buf.substring(result.endOffset(0));

0813             m_currentElement.addContent(firstPart);

0814             //

0815             // Check if the user does not wish to do URL or WikiWord expansion

0816             //

0817             if(prefix.endsWith("~") || prefix.indexOf('[') != -1) {

0818               if(prefix.endsWith("~"))

0819                 prefix = prefix.substring(0, prefix.length() - 1);

0820               if(camelCase != null) {

0821                 m_currentElement.addContent(prefix + camelCase);

0822               } else if(protocol != null) {

0823                 m_currentElement.addContent(prefix + uri);

0824               }

0825               continue;

0826             }

0827             //

0828             // Fine, then let's check what kind of a link this was

0829             // and emit the proper elements

0830             //

0831             if(protocol != null) {

0832               char c = uri.charAt(uri.length() - 1);

0833               if(c == '.' || c == ',') {

0834                 uri = uri.substring(0, uri.length() - 1);

0835                 buf = c + buf;

0836               }

0837               // System.out.println("URI match "+uri);

0838               m_currentElement.addContent(prefix);

0839               makeDirectURILink(uri);

0840             } else {

0841               // System.out.println("Matched: '"+camelCase+"'");

0842               // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");

0843               // System.out.println("prefix="+prefix);

0844               m_currentElement.addContent(prefix);

0845               makeCamelCaseLink(camelCase);

0846             }

0847           }

0848           m_currentElement.addContent(buf);

0849         } else {

0850           //

0851           // No camelcase asked for, just add the elements

0852           //

0853           m_currentElement.addContent(buf);

0854         }

0855       } catch(IllegalDataException e) {

0856         //

0857         // Sometimes it's possible that illegal XML chars is added to the data.

0858         // Here we make sure it does not stop parsing.

0859         //

0860         m_currentElement.addContent(makeError(e.getMessage()));

0861       }

0862     }

0863     return numChars;

0864   }

0865 

0866   /**

0867    * Escapes XML entities in a HTML-compatible way (i.e. does not escape

0868    * entities that are already escaped).

0869    * 

0870    * @param buf

0871    * @return

0872    */

0873   private String escapeHTMLEntities(StringBuffer buf) {

0874     StringBuffer tmpBuf = new StringBuffer(buf.length() + 20);

0875     for(int i = 0; i < buf.length(); i++) {

0876       char ch = buf.charAt(i);

0877       if(ch == '<') {

0878         tmpBuf.append("&lt;");

0879       } else if(ch == '>') {

0880         tmpBuf.append("&gt;");

0881       } else if(ch == '&') {

0882         for(int j = (i < buf.length() - 1) ? i + 1 : i; j < buf.length(); j++) {

0883           int ch2 = buf.charAt(j);

0884           if(ch2 == ';') {

0885             tmpBuf.append(ch);

0886             break;

0887           }

0888           if(ch2 != '#' && !Character.isLetterOrDigit((char)ch2)) {

0889             tmpBuf.append("&amp;");

0890             break;

0891           }

0892         }

0893       } else {

0894         tmpBuf.append(ch);

0895       }

0896     }

0897     return tmpBuf.toString();

0898   }

0899 

0900   private Element pushElement(Element e) {

0901     flushPlainText();

0902     m_currentElement.addContent(e);

0903     m_currentElement = e;

0904     return e;

0905   }

0906 

0907   private Element addElement(Content e) {

0908     if(e != null) {

0909       flushPlainText();

0910       m_currentElement.addContent(e);

0911     }

0912     return m_currentElement;

0913   }

0914 

0915   /**

0916    * All elements that can be empty by the HTML DTD.

0917    */

0918   // Keep sorted.

0919   private static final String[] EMPTY_ELEMENTS = {"area", "base", "br", "col",

0920       "hr", "img", "input", "link", "meta", "p", "param"};

0921 

0922   private Element popElement(String s) {

0923     int flushedBytes = flushPlainText();

0924     Element currEl = m_currentElement;

0925     while(currEl.getParentElement() != null) {

0926       if(currEl.getName().equals(s) && !currEl.isRootElement()) {

0927         m_currentElement = currEl.getParentElement();

0928         //

0929         // Check if it's okay for this element to be empty. Then we will

0930         // trick the JDOM generator into not generating an empty element,

0931         // by putting an empty string between the tags. Yes, it's a kludge

0932         // but what'cha gonna do about it. :-)

0933         //

0934         if(flushedBytes == 0 && Arrays.binarySearch(EMPTY_ELEMENTS, s) < 0) {

0935           currEl.addContent("");

0936         }

0937         return m_currentElement;

0938       }

0939       currEl = currEl.getParentElement();

0940     }

0941     return m_currentElement;

0942   }

0943 

0944   /**

0945    * Reads the stream until it meets one of the specified ending characters, or

0946    * stream end. The ending character will be left in the stream.

0947    */

0948   private String readUntil(String endChars) throws IOException {

0949     StringBuffer sb = new StringBuffer(80);

0950     int ch = nextToken();

0951     while(ch != -1) {

0952       if(ch == '\\') {

0953         ch = nextToken();

0954         if(ch == -1) {

0955           break;

0956         }

0957       } else {

0958         if(endChars.indexOf((char)ch) != -1) {

0959           pushBack(ch);

0960           break;

0961         }

0962       }

0963       sb.append((char)ch);

0964       ch = nextToken();

0965     }

0966     return sb.toString();

0967   }

0968 

0969   /**

0970    * Reads the stream while the characters that have been specified are in the

0971    * stream, returning then the result as a String.

0972    */

0973   private String readWhile(String endChars) throws IOException {

0974     StringBuffer sb = new StringBuffer(80);

0975     int ch = nextToken();

0976     while(ch != -1) {

0977       if(endChars.indexOf((char)ch) == -1) {

0978         pushBack(ch);

0979         break;

0980       }

0981       sb.append((char)ch);

0982       ch = nextToken();

0983     }

0984     return sb.toString();

0985   }

0986 

0987   private JSPWikiMarkupParser m_cleanTranslator;

0988 

0989   /**

0990    * Does a lazy init. Otherwise, we would get into a situation where

0991    * HTMLRenderer would try and boot a TranslatorReader before the

0992    * TranslatorReader it is contained by is up.

0993    */

0994   private JSPWikiMarkupParser getCleanTranslator() {

0995     if(m_cleanTranslator == null) {

0996       m_cleanTranslator = new JSPWikiMarkupParser(null);

0997       m_cleanTranslator.m_allowHTML = true;

0998     }

0999     return m_cleanTranslator;

1000   }

1001 

1002   /**

1003    * Modifies the "hd" parameter to contain proper values. Because an "id" tag

1004    * may only contain [a-zA-Z0-9:_-], we'll replace the % after url encoding

1005    * with '_'.

1006    */

1007   // FIXME: This method should probably be public and in an util class somewhere

1008   private String makeHeadingAnchor(String baseName, String title, Heading hd) {

1009     hd.m_titleText = title;

1010     title = cleanLink(title);

1011     hd.m_titleSection = title;

1012     hd.m_titleAnchor = "section-" + baseName + "-" + hd.m_titleSection;

1013     hd.m_titleAnchor = hd.m_titleAnchor.replace('%', '_');

1014     hd.m_titleAnchor = hd.m_titleAnchor.replace('/', '_');

1015     return hd.m_titleAnchor;

1016   }

1017 

1018   private String makeSectionTitle(String title) {

1019     title = title.trim();

1020     String outTitle;

1021     try {

1022       JSPWikiMarkupParser dtr = getCleanTranslator();

1023       dtr.setInputReader(new StringReader(title));

1024       CleanTextRenderer ctt = new CleanTextRenderer(/* m_context, */dtr.parse());

1025       outTitle = ctt.getString();

1026     } catch(IOException e) {

1027       log.fatal("CleanTranslator not working", e);

1028       throw new RuntimeException(

1029               "CleanTranslator not working as expected, when cleaning title"

1030                       + e.getMessage());

1031     }

1032     return outTitle;

1033   }

1034 

1035   /**

1036    * Returns XHTML for the start of the heading. Also sets the line-end emitter.

1037    * 

1038    * @param level

1039    * @param title

1040    *          the title for the heading

1041    * @param hd

1042    *          a List to which heading should be added

1043    */

1044   public Element makeHeading(int level, String pageName, String title,

1045           Heading hd) {

1046     Element el = null;

1047     String outTitle = makeSectionTitle(title);

1048     hd.m_level = level;

1049     switch(level){

1050       case Heading.HEADING_SMALL:

1051         el = new Element("h4").setAttribute("id", makeHeadingAnchor(pageName,

1052                 outTitle, hd));

1053         break;

1054       case Heading.HEADING_MEDIUM:

1055         el = new Element("h3").setAttribute("id", makeHeadingAnchor(pageName,

1056                 outTitle, hd));

1057         break;

1058       case Heading.HEADING_LARGE:

1059         el = new Element("h2").setAttribute("id", makeHeadingAnchor(pageName,

1060                 outTitle, hd));

1061         break;

1062     }

1063     return el;

1064   }

1065 

1066   /**

1067    * When given a link to a WikiName, we just return a proper HTML link for it.

1068    * The local link mutator chain is also called.

1069    */

1070   private Element makeCamelCaseLink(String wikiname) {

1071     String matchedLink;

1072     // callMutatorChain( m_localLinkMutatorChain, wikiname );

1073     if((matchedLink = linkExists(wikiname)) != null) {

1074       makeLink(READ, matchedLink, wikiname, null);

1075     } else {

1076       makeLink(EDIT, wikiname, wikiname, null);

1077     }

1078     return m_currentElement;

1079   }

1080 

1081   /** Holds the image URL for the duration of this parser */

1082   private String m_outlinkImageURL = null;

1083 

1084   /**

1085    * Returns an element for the external link image (out.png). However, this

1086    * method caches the URL for the lifetime of this MarkupParser, because it's

1087    * commonly used, and we'll end up with possibly hundreds our thousands of

1088    * references to it... It's a lot faster, too.

1089    * 

1090    * @return An element containing the HTML for the outlink image.

1091    */

1092   private Element outlinkImage() {

1093     Element el = null;

1094     if(m_useOutlinkImage) {

1095       if(m_outlinkImageURL == null) {

1096         m_outlinkImageURL = OUTLINK_IMAGE;

1097       }

1098       el = new Element("img").setAttribute("class", "outlink");

1099       el.setAttribute("src", m_outlinkImageURL);

1100       el.setAttribute("alt", "");

1101     }

1102     return el;

1103   }

1104 

1105   /**

1106    * Takes an URL and turns it into a regular wiki link. Unfortunately, because

1107    * of the way that flushPlainText() works, it already encodes all of the XML

1108    * entities. But so does WikiContext.getURL(), so we have to do a

1109    * reverse-replace here, so that it can again be replaced in makeLink.

1110    * <p>

1111    * What a crappy problem.

1112    * 

1113    * @param url

1114    * @return

1115    */

1116   private Element makeDirectURILink(String url) {

1117     Element result;

1118     String last = null;

1119     if(url.endsWith(",") || url.endsWith(".")) {

1120       last = url.substring(url.length() - 1);

1121       url = url.substring(0, url.length() - 1);

1122     }

1123     // callMutatorChain( m_externalLinkMutatorChain, url );

1124     if(isImageLink(url)) {

1125       result = handleImageLink(StringUtils.replace(url, "&amp;", "&"), url,

1126               false);

1127     } else {

1128       result = makeLink(EXTERNAL, StringUtils.replace(url, "&amp;", "&"), url,

1129               null);

1130       addElement(outlinkImage());

1131     }

1132     if(last != null) {

1133       m_plainTextBuf.append(last);

1134     }

1135     return result;

1136   }

1137 

1138   /**

1139    * Image links are handled differently: 1. If the text is a WikiName of an

1140    * existing page, it gets linked. 2. If the text is an external link, then it

1141    * is inlined. 3. Otherwise it becomes an ALT text.

1142    * 

1143    * @param reallink

1144    *          The link to the image.

1145    * @param link

1146    *          Link text portion, may be a link to somewhere else.

1147    * @param hasLinkText

1148    *          If true, then the defined link had a link text available. This

1149    *          means that the link text may be a link to a wiki page, or an

1150    *          external resource.

1151    */

1152   // FIXME: isExternalLink() is called twice.

1153   private Element handleImageLink(String reallink, String link,

1154           boolean hasLinkText) {

1155     String possiblePage = cleanLink(link);

1156     if(isExternalLink(link) && hasLinkText) {

1157       return makeLink(IMAGELINK, reallink, link, null);

1158     } else if((linkExists(possiblePage)) != null && hasLinkText) {

1159       // System.out.println("Orig="+link+", Matched: "+matchedLink);

1160       // callMutatorChain( m_localLinkMutatorChain, possiblePage );

1161       return makeLink(IMAGEWIKILINK, reallink, link, null);

1162     } else {

1163       return makeLink(IMAGE, reallink, link, null);

1164     }

1165   }

1166 

1167   // private Element handleAccessRule( String ruleLine )

1168   // {

1169   // if( !m_parseAccessRules ) return m_currentElement;

1170   // Acl acl;

1171   // WikiPage page = m_context.getPage();

1172   // // UserDatabase db = m_context.getEngine().getUserDatabase();

1173   //

1174   // if( ruleLine.startsWith( "{" ) )

1175   // ruleLine = ruleLine.substring( 1 );

1176   // if( ruleLine.endsWith( "}" ) )

1177   // ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );

1178   //

1179   // log.debug("page="+page.getName()+", ACL = "+ruleLine);

1180   //        

1181   // try

1182   // {

1183   // acl = m_engine.getAclManager().parseAcl( page, ruleLine );

1184   //

1185   // page.setAcl( acl );

1186   //

1187   // log.debug( acl.toString() );

1188   // }

1189   // catch( WikiSecurityException wse )

1190   // {

1191   // return makeError( wse.getMessage() );

1192   // }

1193   //

1194   // return m_currentElement;

1195   // }

1196   /**

1197    * Handles metadata setting [{SET foo=bar}]

1198    */

1199   private Element handleMetadata(String link) {

1200     try {

1201       String args = link.substring(link.indexOf(' '), link.length() - 1);

1202       String name = args.substring(0, args.indexOf('='));

1203       String val = args.substring(args.indexOf('=') + 1, args.length());

1204       name = name.trim();

1205       val = val.trim();

1206       if(val.startsWith("'")) val = val.substring(1);

1207       if(val.endsWith("'")) val = val.substring(0, val.length() - 1);

1208       // log.debug("SET name='"+name+"', value='"+val+"'.");

1209       if(name.length() > 0 && val.length() > 0) {

1210         // val = m_engine.getVariableManager().expandVariables( m_context,

1211         // val );

1212         //            

1213         // m_context.getPage().setAttribute( name, val );

1214       }

1215     } catch(Exception e) {

1216       return makeError(" Invalid SET found: " + link);

1217     }

1218     return m_currentElement;

1219   }

1220 

1221   /**

1222    * Emits a processing instruction that will disable markup escaping. This is

1223    * very useful if you want to emit HTML directly into the stream.

1224    * 

1225    */

1226   private void disableOutputEscaping() {

1227     addElement(new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, ""));

1228   }

1229 

1230   /**

1231    * Gobbles up all hyperlinks that are encased in square brackets.

1232    */

1233   private Element handleHyperlinks(String link, int pos) {

1234     StringBuffer sb = new StringBuffer(link.length() + 80);

1235     String reallink;

1236     int cutpoint;

1237     if(isAccessRule(link)) {

1238       // return handleAccessRule( link );

1239       return null;

1240     }

1241     if(isMetadata(link)) { return handleMetadata(link); }

1242     // if( PluginManager.isPluginLink( link ) )

1243     // {

1244     // try

1245     // {

1246     // Content pluginContent = m_engine.getPluginManager().parsePluginLine(

1247     // m_context, link, pos );

1248     //

1249     // addElement( pluginContent );

1250     // }

1251     // catch( PluginException e )

1252     // {

1253     // log.info( "Failed to insert plugin", e );

1254     // log.info( "Root cause:",e.getRootThrowable() );

1255     // return addElement( makeError("Plugin insertion failed: "+e.getMessage())

1256     // );

1257     // }

1258     //            

1259     // return m_currentElement;

1260     // }

1261     // link = TextUtil.replaceEntities( link );

1262     if((cutpoint = link.indexOf('|')) != -1) {

1263       reallink = link.substring(cutpoint + 1).trim();

1264       link = link.substring(0, cutpoint);

1265     } else {

1266       reallink = link.trim();

1267     }

1268     int interwikipoint = -1;

1269     //

1270     // Yes, we now have the components separated.

1271     // link = the text the link should have

1272     // reallink = the url or page name.

1273     //

1274     // In many cases these are the same. [link|reallink].

1275     //  

1276     // if( VariableManager.isVariableLink( link ) )

1277     // {

1278     // Content el = new VariableContent(link);

1279     //

1280     // addElement( el );

1281     // }

1282     // else

1283     if(isExternalLink(reallink)) {

1284       // It's an external link, out of this Wiki

1285       // callMutatorChain( m_externalLinkMutatorChain, reallink );

1286       if(isImageLink(reallink)) {

1287         handleImageLink(reallink, link, (cutpoint != -1));

1288       } else {

1289         makeLink(EXTERNAL, reallink, link, null);

1290         addElement(outlinkImage());

1291       }

1292     }

1293     // else if( (interwikipoint = reallink.indexOf(":")) != -1 )

1294     // {

1295     // // It's an interwiki link

1296     // // InterWiki links also get added to external link chain

1297     // // after the links have been resolved.

1298     //            

1299     // // FIXME: There is an interesting issue here: We probably should

1300     // // URLEncode the wikiPage, but we can't since some of the

1301     // // Wikis use slashes (/), which won't survive URLEncoding.

1302     // // Besides, we don't know which character set the other Wiki

1303     // // is using, so you'll have to write the entire name as it appears

1304     // // in the URL. Bugger.

1305     //            

1306     // String extWiki = reallink.substring( 0, interwikipoint );

1307     // String wikiPage = reallink.substring( interwikipoint+1 );

1308     //

1309     // String urlReference = m_engine.getInterWikiURL( extWiki );

1310     //

1311     // if( urlReference != null )

1312     // {

1313     // urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );

1314     // callMutatorChain( m_externalLinkMutatorChain, urlReference );

1315     //

1316     // if( isImageLink(urlReference) )

1317     // {

1318     // handleImageLink( urlReference, link, cutpoint != -1 );

1319     // }

1320     // else

1321     // {

1322     // makeLink( INTERWIKI, urlReference, link, null );

1323     // }

1324     //                

1325     // if( isExternalLink(urlReference) )

1326     // {

1327     // addElement( outlinkImage() );

1328     // }

1329     // }

1330     // else

1331     // {

1332     // addElement( makeError("No InterWiki reference defined in properties for

1333     // Wiki called '"+extWiki+"'!)") );

1334     // }

1335     // }

1336     else if(reallink.startsWith("#")) {

1337       // It defines a local footnote

1338       makeLink(LOCAL, reallink, link, null);

1339     } else if(isNumber(reallink)) {

1340       // It defines a reference to a local footnote

1341       makeLink(LOCALREF, reallink, link, null);

1342     } else {

1343       int hashMark = -1;

1344       //

1345       // Internal wiki link, but is it an attachment link?

1346       //

1347       String attachment = findAttachment(reallink);

1348       if(attachment != null) {

1349         // callMutatorChain( m_attachmentLinkMutatorChain, attachment );

1350         if(isImageLink(reallink)) {

1351           attachment = "ATTACH" + attachment;

1352           sb.append(handleImageLink(attachment, link, (cutpoint != -1)));

1353         } else {

1354           makeLink(ATTACHMENT, attachment, link, null);

1355         }

1356       } else if((hashMark = reallink.indexOf('#')) != -1) {

1357         // It's an internal Wiki link, but to a named section

1358         String namedSection = reallink.substring(hashMark + 1);

1359         reallink = reallink.substring(0, hashMark);

1360         reallink = cleanLink(reallink);

1361         // callMutatorChain( m_localLinkMutatorChain, reallink );

1362         String matchedLink;

1363         if((matchedLink = linkExists(reallink)) != null) {

1364           String sectref = "section-" + namedSection;

1365           sectref = sectref.replace('%', '_');

1366           makeLink(READ, matchedLink, link, sectref);

1367         } else {

1368           makeLink(EDIT, reallink, link, null);

1369         }

1370       } else {

1371         // It's an internal Wiki link

1372         reallink = cleanLink(reallink);

1373         // callMutatorChain( m_localLinkMutatorChain, reallink );

1374         String matchedLink = linkExists(reallink);

1375         if(matchedLink != null) {

1376           makeLink(READ, matchedLink, link, null);

1377         } else {

1378           makeLink(EDIT, reallink, link, null);

1379         }

1380       }

1381     }

1382     return m_currentElement;

1383   }

1384 

1385   private String findAttachment(String link) {

1386     // AttachmentManager mgr = m_engine.getAttachmentManager();

1387     // Attachment att = null;

1388     //

1389     // try

1390     // {

1391     // att = mgr.getAttachmentInfo( m_context, link );

1392     // }

1393     // catch( ProviderException e )

1394     // {

1395     // log.warn("Finding attachments failed: ",e);

1396     // return null;

1397     // }

1398     //

1399     // if( att != null )

1400     // {

1401     // return att.getName();

1402     // }

1403     // else if( link.indexOf('/') != -1 )

1404     // {

1405     // return link;

1406     // }

1407     //

1408     return null;

1409   }

1410 

1411   /**

1412    * Pushes back any string that has been read. It will obviously be pushed back

1413    * in a reverse order.

1414    * 

1415    * @since 2.1.77

1416    */

1417   private void pushBack(String s) throws IOException {

1418     for(int i = s.length() - 1; i >= 0; i--) {

1419       pushBack(s.charAt(i));

1420     }

1421   }

1422 

1423   private Element handleBackslash() throws IOException {

1424     int ch = nextToken();

1425     if(ch == '\\') {

1426       int ch2 = nextToken();

1427       if(ch2 == '\\') {

1428         pushElement(new Element("br").setAttribute("clear", "all"));

1429         return popElement("br");

1430       }

1431       pushBack(ch2);

1432       pushElement(new Element("br"));

1433       return popElement("br");

1434     }

1435     pushBack(ch);

1436     return null;

1437   }

1438 

1439   private Element handleUnderscore() throws IOException {

1440     int ch = nextToken();

1441     Element el = null;

1442     if(ch == '_') {

1443       if(m_isbold) {

1444         el = popElement("b");

1445       } else {

1446         el = pushElement(new Element("b"));

1447       }

1448       m_isbold = !m_isbold;

1449     } else {

1450       pushBack(ch);

1451     }

1452     return el;

1453   }

1454 

1455   /**

1456    * For example: italics.

1457    */

1458   private Element handleApostrophe() throws IOException {

1459     int ch = nextToken();

1460     Element el = null;

1461     if(ch == '\'') {

1462       if(m_isitalic) {

1463         el = popElement("i");

1464       } else {

1465         el = pushElement(new Element("i"));

1466       }

1467       m_isitalic = !m_isitalic;

1468     } else {

1469       pushBack(ch);

1470     }

1471     return el;

1472   }

1473 

1474   private Element handleOpenbrace(boolean isBlock) throws IOException {

1475     int ch = nextToken();

1476     if(ch == '{') {

1477       int ch2 = nextToken();

1478       if(ch2 == '{') {

1479         m_isPre = true;

1480         m_isEscaping = true;

1481         m_isPreBlock = isBlock;

1482         if(isBlock) {

1483           startBlockLevel();

1484           return pushElement(new Element("pre"));

1485         }

1486         return pushElement(new Element("span").setAttribute("style",

1487                 "font-family:monospace; white-space:pre;"));

1488       }

1489       pushBack(ch2);

1490       return pushElement(new Element("tt"));

1491     }

1492     pushBack(ch);

1493     return null;

1494   }

1495 

1496   /**

1497    * Handles both }} and }}}

1498    */

1499   private Element handleClosebrace() throws IOException {

1500     int ch2 = nextToken();

1501     if(ch2 == '}') {

1502       int ch3 = nextToken();

1503       if(ch3 == '}') {

1504         if(m_isPre) {

1505           if(m_isPreBlock) {

1506             popElement("pre");

1507           } else {

1508             popElement("span");

1509           }

1510           m_isPre = false;

1511           m_isEscaping = false;

1512           return m_currentElement;

1513         }

1514         m_plainTextBuf.append("}}}");

1515         return m_currentElement;

1516       }

1517       pushBack(ch3);

1518       if(!m_isEscaping) { return popElement("tt"); }

1519     }

1520     pushBack(ch2);

1521     return null;

1522   }

1523 

1524   private Element handleDash() throws IOException {

1525     int ch = nextToken();

1526     if(ch == '-') {

1527       int ch2 = nextToken();

1528       if(ch2 == '-') {

1529         int ch3 = nextToken();

1530         if(ch3 == '-') {

1531           // Empty away all the rest of the dashes.

1532           // Do not forget to return the first non-match back.

1533           while((ch = nextToken()) == '-')

1534             ;

1535           pushBack(ch);

1536           startBlockLevel();

1537           pushElement(new Element("hr"));

1538           return popElement("hr");

1539         }

1540         pushBack(ch3);

1541       }

1542       pushBack(ch2);

1543     }

1544     pushBack(ch);

1545     return null;

1546   }

1547 

1548   private Element handleHeading(String pageName) throws IOException {

1549     Element el = null;

1550     int ch = nextToken();

1551     Heading hd = new Heading();

1552     if(ch == '!') {

1553       int ch2 = nextToken();

1554       if(ch2 == '!') {

1555         String title = peekAheadLine();

1556         el = makeHeading(Heading.HEADING_LARGE, pageName, title, hd);

1557       } else {

1558         pushBack(ch2);

1559         String title = peekAheadLine();

1560         el = makeHeading(Heading.HEADING_MEDIUM, pageName, title, hd);

1561       }

1562     } else {

1563       pushBack(ch);

1564       String title = peekAheadLine();

1565       el = makeHeading(Heading.HEADING_SMALL, pageName, title, hd);

1566     }

1567     // callHeadingListenerChain( hd );

1568     if(el != null) pushElement(el);

1569     return el;

1570   }

1571 

1572   /**

1573    * Reads the stream until the next EOL or EOF. Note that it will also read the

1574    * EOL from the stream.

1575    */

1576   private StringBuffer readUntilEOL() throws IOException {

1577     int ch;

1578     StringBuffer buf = new StringBuffer(256);

1579     while(true) {

1580       ch = nextToken();

1581       if(ch == -1) break;

1582       buf.append((char)ch);

1583       if(ch == '\n') break;

1584     }

1585     return buf;

1586   }

1587 

1588   /** Controls whether italic is restarted after a paragraph shift */

1589   private boolean m_restartitalic = false;

1590 

1591   private boolean m_restartbold = false;

1592 

1593   /**

1594    * Starts a block level element, therefore closing a potential open paragraph

1595    * tag.

1596    */

1597   private void startBlockLevel() {

1598     // These may not continue over block level limits in XHTML

1599     popElement("i");

1600     popElement("b");

1601     popElement("tt");

1602     if(m_isOpenParagraph) {

1603       m_isOpenParagraph = false;

1604       popElement("p");

1605       m_plainTextBuf.append("\n"); // Just small beautification

1606     }

1607     m_restartitalic = m_isitalic;

1608     m_restartbold = m_isbold;

1609     m_isitalic = false;

1610     m_isbold = false;

1611   }

1612 

1613   private static String getListType(char c) {

1614     if(c == '*') {

1615       return "ul";

1616     } else if(c == '#') { return "ol"; }

1617     throw new RuntimeException("Parser got faulty list type: " + c);

1618   }

1619 

1620   /**

1621    * Like original handleOrderedList() and handleUnorderedList() however handles

1622    * both ordered ('#') and unordered ('*') mixed together.

1623    */

1624   // FIXME: Refactor this; it's a bit messy.

1625   private Element handleGeneralList() throws IOException {

1626     startBlockLevel();

1627     String strBullets = readWhile("*#");

1628     // String strBulletsRaw = strBullets; // to know what was original before

1629     // phpwiki style substitution

1630     int numBullets = strBullets.length();

1631     // override the beginning portion of bullet pattern to be like the previous

1632     // to simulate PHPWiki style lists

1633     if(m_allowPHPWikiStyleLists) {

1634       // only substitute if different

1635       if(!(strBullets.substring(0, Math.min(numBullets, m_genlistlevel))

1636               .equals(m_genlistBulletBuffer.substring(0, Math.min(numBullets,

1637                       m_genlistlevel))))) {

1638         if(numBullets <= m_genlistlevel) {

1639           // Substitute all but the last character (keep the expressed bullet

1640           // preference)

1641           strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0,

1642                   numBullets - 1) : "")

1643                   + strBullets.substring(numBullets - 1, numBullets);

1644         } else {

1645           strBullets = m_genlistBulletBuffer

1646                   + strBullets.substring(m_genlistlevel, numBullets);

1647         }

1648       }

1649     }

1650     //

1651     // Check if this is still of the same type

1652     //

1653     if(strBullets.substring(0, Math.min(numBullets, m_genlistlevel)).equals(

1654             m_genlistBulletBuffer.substring(0, Math.min(numBullets,

1655                     m_genlistlevel)))) {

1656       if(numBullets > m_genlistlevel) {

1657         pushElement(new Element(

1658                 getListType(strBullets.charAt(m_genlistlevel++))));

1659         // buf.append( m_renderer.openList(strBullets.charAt(m_genlistlevel++))

1660         // );

1661         for(; m_genlistlevel < numBullets; m_genlistlevel++) {

1662           // bullets are growing, get from new bullet list

1663           pushElement(new Element("li"));

1664           // buf.append( m_renderer.openListItem() );

1665           pushElement(new Element(

1666                   getListType(strBullets.charAt(m_genlistlevel))));

1667           // buf.append( m_renderer.openList(strBullets.charAt(m_genlistlevel))

1668           // );

1669         }

1670       } else if(numBullets < m_genlistlevel) {

1671         // Close the previous list item.

1672         // buf.append( m_renderer.closeListItem() );

1673         popElement("li");

1674         for(; m_genlistlevel > numBullets; m_genlistlevel--) {

1675           // bullets are shrinking, get from old bullet list

1676           // buf.append(

1677           // m_renderer.closeList(m_genlistBulletBuffer.charAt(m_genlistlevel -

1678           // 1)) );

1679           popElement(getListType(m_genlistBulletBuffer

1680                   .charAt(m_genlistlevel - 1)));

1681           if(m_genlistlevel > 0) {

1682             // buf.append( m_renderer.closeListItem() );

1683             popElement("li");

1684           }

1685         }

1686       } else {

1687         if(m_genlistlevel > 0) {

1688           popElement("li");

1689           // buf.append( m_renderer.closeListItem() );

1690         }

1691       }

1692     } else {

1693       //

1694       // The pattern has changed, unwind and restart

1695       //

1696       int numEqualBullets;

1697       int numCheckBullets;

1698       // find out how much is the same

1699       numEqualBullets = 0;

1700       numCheckBullets = Math.min(numBullets, m_genlistlevel);

1701       while(numEqualBullets < numCheckBullets) {

1702         // if the bullets are equal so far, keep going

1703         if(strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer

1704                 .charAt(numEqualBullets))

1705           numEqualBullets++;

1706         // otherwise giveup, we have found how many are equal

1707         else break;

1708       }

1709       // unwind

1710       for(; m_genlistlevel > numEqualBullets; m_genlistlevel--) {

1711         popElement(getListType(m_genlistBulletBuffer.charAt(m_genlistlevel - 1)));

1712         // buf.append( m_renderer.closeList(

1713         // m_genlistBulletBuffer.charAt(m_genlistlevel - 1) ) );

1714         if(m_genlistlevel > 0) {

1715           // buf.append( m_renderer.closeListItem() );

1716           popElement("li");

1717         }

1718       }

1719       // rewind

1720       // buf.append( m_renderer.openList( strBullets.charAt(numEqualBullets++) )

1721       // );

1722       pushElement(new Element(getListType(strBullets.charAt(numEqualBullets++))));

1723       for(int i = numEqualBullets; i < numBullets; i++) {

1724         pushElement(new Element("li"));

1725         pushElement(new Element(getListType(strBullets.charAt(i))));

1726         // buf.append( m_renderer.openListItem() );

1727         // buf.append( m_renderer.openList( strBullets.charAt(i) ) );

1728       }

1729       m_genlistlevel = numBullets;

1730     }

1731     // buf.append( m_renderer.openListItem() );

1732     pushElement(new Element("li"));

1733     // work done, remember the new bullet list (in place of old one)

1734     m_genlistBulletBuffer.setLength(0);

1735     m_genlistBulletBuffer.append(strBullets);

1736     return m_currentElement;

1737   }

1738 

1739   private Element unwindGeneralList() {

1740     // unwind

1741     for(; m_genlistlevel > 0; m_genlistlevel--) {

1742       popElement("li");

1743       popElement(getListType(m_genlistBulletBuffer.charAt(m_genlistlevel - 1)));

1744     }

1745     m_genlistBulletBuffer.setLength(0);

1746     return null;

1747   }

1748 

1749   private Element handleDefinitionList() throws IOException {

1750     if(!m_isdefinition) {

1751       m_isdefinition = true;

1752       startBlockLevel();

1753       pushElement(new Element("dl"));

1754       return pushElement(new Element("dt"));

1755     }

1756     return null;

1757   }

1758 

1759   private Element handleOpenbracket() throws IOException {

1760     StringBuffer sb = new StringBuffer(40);

1761     int pos = getPosition();

1762     int ch;

1763     boolean isPlugin = false;

1764     while((ch = nextToken()) == '[') {

1765       sb.append((char)ch);

1766     }

1767     if(ch == '{') {

1768       isPlugin = true;

1769     }

1770     pushBack(ch);

1771     if(sb.length() > 0) {

1772       m_plainTextBuf.append(sb);

1773       return m_currentElement;

1774     }

1775     //

1776     // Find end of hyperlink

1777     //

1778     ch = nextToken();

1779     int nesting = 1; // Check for nested plugins

1780     while(ch != -1) {

1781       int ch2 = nextToken();

1782       pushBack(ch2);

1783       if(isPlugin) {

1784         if(ch == '[' && ch2 == '{') {

1785           nesting++;

1786         } else if(nesting == 0 && ch == ']'

1787                 && sb.charAt(sb.length() - 1) == '}') {

1788           break;

1789         } else if(ch == '}' && ch2 == ']') {

1790           // NB: This will be decremented once at the end

1791           nesting--;

1792         }

1793       } else {

1794         if(ch == ']') {

1795           break;

1796         }

1797       }

1798       sb.append((char)ch);

1799       ch = nextToken();

1800     }

1801     //

1802     // If the link is never finished, do some tricks to display the rest of the

1803     // line

1804     // unchanged.

1805     //

1806     if(ch == -1) {

1807       log.debug("Warning: unterminated link detected!");

1808       m_isEscaping = true;

1809       m_plainTextBuf.append(sb);

1810       flushPlainText();

1811       m_isEscaping = false;

1812       return m_currentElement;

1813     }

1814     return handleHyperlinks(sb.toString(), pos);

1815   }

1816 

1817   /**

1818    * Reads the stream until the current brace is closed or stream end.

1819    */

1820   private String readBraceContent(char opening, char closing)

1821           throws IOException {

1822     StringBuffer sb = new StringBuffer(40);

1823     int braceLevel = 1;

1824     int ch;

1825     while((ch = nextToken()) != -1) {

1826       if(ch == '\\') {

1827         continue;

1828       } else if(ch == opening) {

1829         braceLevel++;

1830       } else if(ch == closing) {

1831         braceLevel--;

1832         if(braceLevel == 0) {

1833           break;

1834         }

1835       }

1836       sb.append((char)ch);

1837     }

1838     return sb.toString();

1839   }

1840 

1841   /**

1842    * Handles constructs of type %%(style) and %%class

1843    * 

1844    * @param newLine

1845    * @return

1846    * @throws IOException

1847    */

1848   private Element handleDiv(boolean newLine) throws IOException {

1849     int ch = nextToken();

1850     Element el = null;

1851     if(ch == '%') {

1852       String style = null;

1853       String clazz = null;

1854       ch = nextToken();

1855       //

1856       // Style or class?

1857       //

1858       if(ch == '(') {

1859         style = readBraceContent('(', ')');

1860       } else if(Character.isLetter((char)ch)) {

1861         pushBack(ch);

1862         clazz = readUntil(" \t\n\r");

1863         ch = nextToken();

1864         //

1865         // Pop out only spaces, so that the upcoming EOL check does not check

1866         // the

1867         // next line.

1868         //

1869         if(ch == '\n' || ch == '\r') {

1870           pushBack(ch);

1871         }

1872       } else {

1873         //

1874         // Anything else stops.

1875         //

1876         pushBack(ch);

1877         try {

1878           Boolean isSpan = (Boolean)m_styleStack.pop();

1879           if(isSpan == null) {

1880             // Fail quietly

1881           } else if(isSpan.booleanValue()) {

1882             el = popElement("span");

1883           } else {

1884             el = popElement("div");

1885           }

1886         } catch(EmptyStackException e) {

1887           log.debug("Page '" + "' closes a %%-block that has not been opened.");

1888           return m_currentElement;

1889         }

1890         return el;

1891       }

1892       //

1893       // Check if there is an attempt to do something nasty

1894       //

1895       style = StringEscapeUtils.unescapeHtml(style);

1896       if(style != null && style.indexOf("javascript:") != -1) {

1897         log.debug("Attempt to output javascript within CSS:" + style);

1898         return addElement(makeError("Attempt to output javascript!"));

1899       }

1900       //

1901       // Decide if we should open a div or a span?

1902       //

1903       String eol = peekAheadLine();

1904       if(eol.trim().length() > 0) {

1905         // There is stuff after the class

1906         el = new Element("span");

1907         m_styleStack.push(Boolean.TRUE);

1908       } else {

1909         startBlockLevel();

1910         el = new Element("div");

1911         m_styleStack.push(Boolean.FALSE);

1912       }

1913       if(style != null) el.setAttribute("style", style);

1914       if(clazz != null) el.setAttribute("class", clazz);

1915       el = pushElement(el);

1916       return el;

1917     }

1918     pushBack(ch);

1919     return el;

1920   }

1921 

1922   private Element handleSlash(boolean newLine) throws IOException {

1923     int ch = nextToken();

1924     pushBack(ch);

1925     if(ch == '%' && !m_styleStack.isEmpty()) { return handleDiv(newLine); }

1926     return null;

1927   }

1928 

1929   private Element handleBar(boolean newLine) throws IOException {

1930     Element el = null;

1931     if(!m_istable && !newLine) { return null; }

1932     if(newLine) {

1933       if(!m_istable) {

1934         startBlockLevel();

1935         el = pushElement(new Element("table")

1936                 .setAttribute("class", "wikitable").setAttribute("border", "1"));

1937         m_istable = true;

1938         m_rowNum = 0;

1939       }

1940       m_rowNum++;

1941       Element tr = (m_rowNum % 2 != 0) ? new Element("tr").setAttribute(

1942               "class", "odd") : new Element("tr");

1943       el = pushElement(tr);

1944       // m_closeTag = m_renderer.closeTableItem()+m_renderer.closeTableRow();

1945     }

1946     int ch = nextToken();

1947     if(ch == '|') {

1948       if(!newLine) {

1949         el = popElement("th");

1950       }

1951       el = pushElement(new Element("th"));

1952     } else {

1953       if(!newLine) {

1954         el = popElement("td");

1955       }

1956       el = pushElement(new Element("td"));

1957       pushBack(ch);

1958     }

1959     return el;

1960   }

1961 

1962   /**

1963    * Generic escape of next character or entity.

1964    */

1965   private Element handleTilde() throws IOException {

1966     int ch = nextToken();

1967     if(ch == ' ') return m_currentElement;

1968     if(ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#'

1969             || ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '['

1970             || ch == '{' || ch == ']' || ch == '}' || ch == '%') {

1971       m_plainTextBuf.append((char)ch);

1972       m_plainTextBuf.append(readWhile("" + (char)ch));

1973       return m_currentElement;

1974     }

1975     // No escape.

1976     pushBack(ch);

1977     return null;

1978   }

1979 

1980   private void fillBuffer(Element startElement) throws IOException {

1981     m_currentElement = startElement;

1982     boolean quitReading = false;

1983     boolean newLine = true; // FIXME: not true if reading starts in middle of

1984                             // buffer

1985     disableOutputEscaping();

1986     while(!quitReading) {

1987       int ch = nextToken();

1988       Element el = null;

1989       //

1990       // Check if we're actually ending the preformatted mode.

1991       // We still must do an entity transformation here.

1992       //

1993       if(m_isEscaping) {

1994         if(ch == '}') {

1995           if(handleClosebrace() == null) m_plainTextBuf.append((char)ch);

1996         } else if(ch == -1) {

1997           quitReading = true;

1998         } else if(ch == '\r') {

1999           // DOS line feeds we ignore.

2000         } else if(ch == '<') {

2001           m_plainTextBuf.append("&lt;");

2002         } else if(ch == '>') {

2003           m_plainTextBuf.append("&gt;");

2004         } else if(ch == '&') {

2005           m_plainTextBuf.append("&amp;");

2006         } else if(ch == '~') {

2007           String braces = readWhile("}");

2008           if(braces.length() >= 3) {

2009             m_plainTextBuf.append("}}}");

2010             braces = braces.substring(3);

2011           } else {

2012             m_plainTextBuf.append((char)ch);

2013           }

2014           for(int i = braces.length() - 1; i >= 0; i--) {

2015             pushBack(braces.charAt(i));

2016           }

2017         } else {

2018           m_plainTextBuf.append((char)ch);

2019         }

2020         continue;

2021       }

2022       //

2023       // An empty line stops a list

2024       //

2025       if(newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0) {

2026         m_plainTextBuf.append(unwindGeneralList());

2027       }

2028       if(newLine && ch != '|' && m_istable) {

2029         el = popElement("table");

2030         m_istable = false;

2031       }

2032       //

2033       // Now, check the incoming token.

2034       //

2035       switch(ch){

2036         case '\r':

2037           // DOS linefeeds we forget

2038           continue;

2039         case '\n':

2040           //

2041           // Close things like headings, etc.

2042           //

2043           // FIXME: This is not really very fast

2044           popElement("dl"); // Close definition lists.

2045           popElement("h2");

2046           popElement("h3");

2047           popElement("h4");

2048           if(m_istable) {

2049             popElement("tr");

2050           }

2051           m_isdefinition = false;

2052           if(newLine) {

2053             // Paragraph change.

2054             startBlockLevel();

2055             //

2056             // Figure out which elements cannot be enclosed inside

2057             // a <p></p> pair according to XHTML rules.

2058             //

2059             String nextLine = peekAheadLine();

2060             if(nextLine.length() == 0

2061                     || (nextLine.length() > 0 && !nextLine.startsWith("{{{")

2062                             && !nextLine.startsWith("----")

2063                             && !nextLine.startsWith("%%") && "*#!;"

2064                             .indexOf(nextLine.charAt(0)) == -1)) {

2065               pushElement(new Element("p"));

2066               m_isOpenParagraph = true;

2067               if(m_restartitalic) {

2068                 pushElement(new Element("i"));

2069                 m_isitalic = true;

2070                 m_restartitalic = false;

2071               }

2072               if(m_restartbold) {

2073                 pushElement(new Element("b"));

2074                 m_isbold = true;

2075                 m_restartbold = false;

2076               }

2077             }

2078           } else {

2079             m_plainTextBuf.append("\n");

2080             newLine = true;

2081           }

2082           continue;

2083         case '\\':

2084           el = handleBackslash();

2085           break;

2086         case '_':

2087           el = handleUnderscore();

2088           break;

2089         case '\'':

2090           el = handleApostrophe();

2091           break;

2092         case '{':

2093           el = handleOpenbrace(newLine);

2094           break;

2095         case '}':

2096           el = handleClosebrace();

2097           break;

2098         case '-':

2099           if(newLine) el = handleDash();

2100           break;

2101         case '!':

2102           if(newLine) {

2103             el = handleHeading(null);

2104           }

2105           break;

2106         case ';':

2107           if(newLine) {

2108             el = handleDefinitionList();

2109           }

2110           break;

2111         case ':':

2112           if(m_isdefinition) {

2113             popElement("dt");

2114             el = pushElement(new Element("dd"));

2115             m_isdefinition = false;

2116           }

2117           break;

2118         case '[':

2119           el = handleOpenbracket();

2120           break;

2121         case '*':

2122           if(newLine) {

2123             pushBack('*');

2124             el = handleGeneralList();

2125           }

2126           break;

2127         case '#':

2128           if(newLine) {

2129             pushBack('#');

2130             el = handleGeneralList();

2131           }

2132           break;

2133         case '|':

2134           el = handleBar(newLine);

2135           break;

2136         case '~':

2137           el = handleTilde();

2138           break;

2139         case '%':

2140           el = handleDiv(newLine);

2141           break;

2142         case '/':

2143           el = handleSlash(newLine);

2144           break;

2145         case -1:

2146           quitReading = true;

2147           continue;

2148       }

2149       //

2150       // The idea is as follows: If the handler method returns

2151       // an element (el != null), it is assumed that it has been

2152       // added in the stack. Otherwise the character is added

2153       // as is to the plaintext buffer.

2154       //

2155       // For the transition phase, if s != null, it also gets

2156       // added in the plaintext buffer.

2157       //

2158       if(el != null) {

2159         newLine = false;

2160       } else {

2161         m_plainTextBuf.append((char)ch);

2162         newLine = false;

2163       }

2164     }

2165     popElement("domroot");

2166   }

2167 

2168   public Document parse() throws IOException {

2169     Element rootElement = new Element("html");

2170     Element headElem = new Element("head");

2171     rootElement.addContent(headElem);

2172     Element bodyElem = new Element("body");

2173     rootElement.addContent(bodyElem);

2174     Document d = new Document(rootElement);

2175     try {

2176       fillBuffer(bodyElem);

2177     } catch(IllegalDataException e) {

2178       log.error("Page "

2179               + " contained something that cannot be added in the DOM tree", e);

2180       throw new IOException("Illegal page data: " + e.getMessage());

2181     }

2182     return d;

2183   }

2184 

2185   /**

2186    * Compares two Strings, and if one starts with the other, then returns null.

2187    * Otherwise just like the normal Comparator for strings.

2188    * 

2189    * @author jalkanen

2190    * 

2191    * @since

2192    */

2193   private static class StartingComparator implements Comparator {

2194     public int compare(Object arg0, Object arg1) {

2195       String s1 = (String)arg0;

2196       String s2 = (String)arg1;

2197       if(s1.length() > s2.length()) {

2198         if(s1.startsWith(s2) && s2.length() > 1) return 0;

2199       } else {

2200         if(s2.startsWith(s1) && s1.length() > 1) return 0;

2201       }

2202       return s1.compareTo(s2);

2203     }

2204   }

2205 

2206   private static class Heading {

2207     public static final int HEADING_SMALL = 1;

2208 

2209     public static final int HEADING_MEDIUM = 2;

2210 

2211     public static final int HEADING_LARGE = 3;

2212 

2213     public int m_level;

2214 

2215     public String m_titleText;

2216 

2217     public String m_titleAnchor;

2218 

2219     public String m_titleSection;

2220   }

2221 

2222   private static class CleanTextRenderer {

2223     protected static final Logger log = Logger

2224             .getLogger(CleanTextRenderer.class);

2225 

2226     protected Document m_document;

2227 

2228     public CleanTextRenderer(/* WikiContext context, */Document doc) {

2229       this.m_document = doc;

2230     }

2231 

2232     public String getString() throws IOException {

2233       StringBuffer sb = new StringBuffer();

2234       try {

2235         XPath xp = XPath.newInstance("//text()");

2236         List nodes = xp.selectNodes(m_document.getDocument());

2237         for(Iterator i = nodes.iterator(); i.hasNext();) {

2238           Object el = i.next();

2239           if(el instanceof Text) {

2240             sb.append(((Text)el).getValue());

2241           }

2242         }

2243       } catch(JDOMException e) {

2244         log.error("Could not parse XPATH expression");

2245         throw new IOException(e.getMessage());

2246       }

2247       return sb.toString();

2248     }

2249   }

2250 }