Class HtmlTreeBuilder

java.lang.Object
org.jsoup.parser.TreeBuilder
org.jsoup.parser.HtmlTreeBuilder

public class HtmlTreeBuilder extends TreeBuilder
HTML Tree Builder; creates a DOM from Tokens.
  • Field Details

    • TagsSearchInScope

      static final String[] TagsSearchInScope
    • TagSearchList

      static final String[] TagSearchList
    • TagSearchButton

      static final String[] TagSearchButton
    • TagSearchTableScope

      static final String[] TagSearchTableScope
    • TagSearchSelectScope

      static final String[] TagSearchSelectScope
    • TagSearchEndTags

      static final String[] TagSearchEndTags
    • TagThoroughSearchEndTags

      static final String[] TagThoroughSearchEndTags
    • TagSearchSpecial

      static final String[] TagSearchSpecial
    • TagMathMlTextIntegration

      static final String[] TagMathMlTextIntegration
    • TagSvgHtmlIntegration

      static final String[] TagSvgHtmlIntegration
    • MaxScopeSearchDepth

      public static final int MaxScopeSearchDepth
      See Also:
    • state

      private HtmlTreeBuilderState state
    • originalState

      private HtmlTreeBuilderState originalState
    • baseUriSetFromDoc

      private boolean baseUriSetFromDoc
    • headElement

      private Element headElement
    • formElement

      private FormElement formElement
    • contextElement

      private Element contextElement
    • formattingElements

      private ArrayList<Element> formattingElements
    • tmplInsertMode

      private ArrayList<HtmlTreeBuilderState> tmplInsertMode
    • pendingTableCharacters

      private List<Token.Character> pendingTableCharacters
    • emptyEnd

      private Token.EndTag emptyEnd
    • framesetOk

      private boolean framesetOk
    • fosterInserts

      private boolean fosterInserts
    • fragmentParsing

      private boolean fragmentParsing
    • maxQueueDepth

      private static final int maxQueueDepth
      See Also:
    • specificScopeTarget

      private final String[] specificScopeTarget
    • maxUsedFormattingElements

      private static final int maxUsedFormattingElements
      See Also:
  • Constructor Details

    • HtmlTreeBuilder

      public HtmlTreeBuilder()
  • Method Details

    • defaultSettings

      ParseSettings defaultSettings()
      Specified by:
      defaultSettings in class TreeBuilder
    • newInstance

      HtmlTreeBuilder newInstance()
      Description copied from class: TreeBuilder
      Create a new copy of this TreeBuilder
      Specified by:
      newInstance in class TreeBuilder
      Returns:
      copy, ready for a new parse
    • initialiseParse

      protected void initialiseParse(Reader input, String baseUri, Parser parser)
      Overrides:
      initialiseParse in class TreeBuilder
    • initialiseParseFragment

      void initialiseParseFragment(Element context)
      Overrides:
      initialiseParseFragment in class TreeBuilder
    • completeParseFragment

      List<Node> completeParseFragment()
      Specified by:
      completeParseFragment in class TreeBuilder
    • process

      protected boolean process(Token token)
      Specified by:
      process in class TreeBuilder
    • useCurrentOrForeignInsert

      boolean useCurrentOrForeignInsert(Token token)
    • isMathmlTextIntegration

      static boolean isMathmlTextIntegration(Element el)
    • isHtmlIntegration

      static boolean isHtmlIntegration(Element el)
    • process

      boolean process(Token token, HtmlTreeBuilderState state)
    • transition

      void transition(HtmlTreeBuilderState state)
    • state

    • markInsertionMode

      void markInsertionMode()
    • originalState

      HtmlTreeBuilderState originalState()
    • framesetOk

      void framesetOk(boolean framesetOk)
    • framesetOk

      boolean framesetOk()
    • getDocument

      Document getDocument()
    • getBaseUri

      String getBaseUri()
    • maybeSetBaseUri

      void maybeSetBaseUri(Element base)
    • isFragmentParsing

      boolean isFragmentParsing()
    • error

      void error(HtmlTreeBuilderState state)
    • createElementFor

      Element createElementFor(Token.StartTag startTag, String namespace, boolean forcePreserveCase)
    • insertElementFor

      Element insertElementFor(Token.StartTag startTag)
      Inserts an HTML element for the given tag)
    • insertForeignElementFor

      Element insertForeignElementFor(Token.StartTag startTag, String namespace)
      Inserts a foreign element. Preserves the case of the tag name and of the attributes.
    • insertEmptyElementFor

      Element insertEmptyElementFor(Token.StartTag startTag)
    • insertFormElement

      FormElement insertFormElement(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack)
    • doInsertElement

      private void doInsertElement(Element el, Token token)
      Inserts the Element onto the stack. All element inserts must run through this method. Performs any general tests on the Element before insertion.
      Parameters:
      el - the Element to insert and make the current element
      token - the token this element was parsed from. If null, uses a zero-width current token as intrinsic insert
    • insertCommentNode

      void insertCommentNode(Token.Comment token)
    • insertCharacterNode

      void insertCharacterNode(Token.Character characterToken)
      Inserts the provided character token into the current element.
    • insertCharacterToElement

      void insertCharacterToElement(Token.Character characterToken, Element el)
      Inserts the provided character token into the provided element.
    • getStack

      ArrayList<Element> getStack()
    • onStack

      boolean onStack(Element el)
    • onStack

      boolean onStack(String elName)
      Checks if there is an HTML element with the given name on the stack.
    • onStack

      private static boolean onStack(ArrayList<Element> queue, Element element)
    • getFromStack

      Element getFromStack(String elName)
      Gets the nearest (lowest) HTML element with the given name from the stack.
    • removeFromStack

      boolean removeFromStack(Element el)
    • popStackToClose

      Element popStackToClose(String elName)
      Pops the stack until the given HTML element is removed.
    • popStackToCloseAnyNamespace

      Element popStackToCloseAnyNamespace(String elName)
      Pops the stack until an element with the supplied name is removed, irrespective of namespace.
    • popStackToClose

      void popStackToClose(String... elNames)
      Pops the stack until one of the given HTML elements is removed.
    • clearStackToTableContext

      void clearStackToTableContext()
    • clearStackToTableBodyContext

      void clearStackToTableBodyContext()
    • clearStackToTableRowContext

      void clearStackToTableRowContext()
    • clearStackToContext

      private void clearStackToContext(String... nodeNames)
      Removes elements from the stack until one of the supplied HTML elements is removed.
    • aboveOnStack

      Element aboveOnStack(Element el)
    • insertOnStackAfter

      void insertOnStackAfter(Element after, Element in)
    • replaceOnStack

      void replaceOnStack(Element out, Element in)
    • replaceInQueue

      private static void replaceInQueue(ArrayList<Element> queue, Element out, Element in)
    • resetInsertionMode

      boolean resetInsertionMode()
      Reset the insertion mode, by searching up the stack for an appropriate insertion mode. The stack search depth is limited to maxQueueDepth.
      Returns:
      true if the insertion mode was actually changed.
    • resetBody

      void resetBody()
      Places the body back onto the stack and moves to InBody, for cases in AfterBody / AfterAfterBody when more content comes
    • inSpecificScope

      private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes)
    • inSpecificScope

      private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes)
    • inScope

      boolean inScope(String[] targetNames)
    • inScope

      boolean inScope(String targetName)
    • inScope

      boolean inScope(String targetName, String[] extras)
    • inListItemScope

      boolean inListItemScope(String targetName)
    • inButtonScope

      boolean inButtonScope(String targetName)
    • inTableScope

      boolean inTableScope(String targetName)
    • inSelectScope

      boolean inSelectScope(String targetName)
    • onStackNot

      boolean onStackNot(String[] allowedTags)
      Tests if there is some element on the stack that is not in the provided set.
    • setHeadElement

      void setHeadElement(Element headElement)
    • getHeadElement

      Element getHeadElement()
    • isFosterInserts

      boolean isFosterInserts()
    • setFosterInserts

      void setFosterInserts(boolean fosterInserts)
    • getFormElement

      FormElement getFormElement()
    • setFormElement

      void setFormElement(FormElement formElement)
    • resetPendingTableCharacters

      void resetPendingTableCharacters()
    • getPendingTableCharacters

      List<Token.Character> getPendingTableCharacters()
    • addPendingTableCharacters

      void addPendingTableCharacters(Token.Character c)
    • generateImpliedEndTags

      void generateImpliedEndTags(String excludeTag)
      13.2.6.3 Closing elements that have implied end tags When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, or an rtc element, the UA must pop the current node off the stack of open elements. If a step requires the UA to generate implied end tags but lists an element to exclude from the process, then the UA must perform the above steps as if that element was not in the above list. When the steps below require the UA to generate all implied end tags thoroughly, then, while the current node is a caption element, a colgroup element, a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, an rtc element, a tbody element, a td element, a tfoot element, a th element, a thead element, or a tr element, the UA must pop the current node off the stack of open elements.
      Parameters:
      excludeTag - If a step requires the UA to generate implied end tags but lists an element to exclude from the process, then the UA must perform the above steps as if that element was not in the above list.
    • generateImpliedEndTags

      void generateImpliedEndTags()
    • generateImpliedEndTags

      void generateImpliedEndTags(boolean thorough)
      Pops HTML elements off the stack according to the implied end tag rules
      Parameters:
      thorough - if we are thorough (includes table elements etc) or not
    • closeElement

      void closeElement(String name)
    • isSpecial

      static boolean isSpecial(Element el)
    • lastFormattingElement

      Element lastFormattingElement()
    • positionOfElement

      int positionOfElement(Element el)
    • removeLastFormattingElement

      Element removeLastFormattingElement()
    • pushActiveFormattingElements

      void pushActiveFormattingElements(Element in)
    • pushWithBookmark

      void pushWithBookmark(Element in, int bookmark)
    • checkActiveFormattingElements

      void checkActiveFormattingElements(Element in)
    • isSameFormattingElement

      private static boolean isSameFormattingElement(Element a, Element b)
    • reconstructFormattingElements

      void reconstructFormattingElements()
    • clearFormattingElementsToLastMarker

      void clearFormattingElementsToLastMarker()
    • removeFromActiveFormattingElements

      void removeFromActiveFormattingElements(Element el)
    • isInActiveFormattingElements

      boolean isInActiveFormattingElements(Element el)
    • getActiveFormattingElement

      Element getActiveFormattingElement(String nodeName)
    • replaceActiveFormattingElement

      void replaceActiveFormattingElement(Element out, Element in)
    • insertMarkerToFormattingElements

      void insertMarkerToFormattingElements()
    • insertInFosterParent

      void insertInFosterParent(Node in)
    • pushTemplateMode

      void pushTemplateMode(HtmlTreeBuilderState state)
    • popTemplateMode

      HtmlTreeBuilderState popTemplateMode()
    • templateModeSize

      int templateModeSize()
    • currentTemplateMode

      HtmlTreeBuilderState currentTemplateMode()
    • toString

      public String toString()
      Overrides:
      toString in class Object
    • isContentForTagData

      protected boolean isContentForTagData(String normalName)
      Description copied from class: TreeBuilder
      (An internal method, visible for Element. For HTML parse, signals that script and style text should be treated as Data Nodes).
      Overrides:
      isContentForTagData in class TreeBuilder