Hets - the Heterogeneous Tool Set

Copyright(c) Christian Maeder, DFKI GmbH 2010
LicenseGPLv2 or higher, see LICENSE.txt
Safe HaskellSafe-Inferred




XPath utilities independent of xml package. references: http://www.w3.org/TR/xpath/ http://www.galiel.net/el/study/XPath_Overview.html http://www.fh-wedel.de/~si/HXmlToolbox/hxpath/diplomarbeit.pdf http://hackage.haskell.org/package/hxt-xpath (modules XPathParser, XPathDataTypes) http://hackage.haskell.org/package/hxt-8.5.0 (modules Text.XML.HXT.DOM.Unicode, Text.XML.HXT.Parser.XmlCharParser) http://www.w3.org/TR/REC-xml/#NT-Name

Unicode is not fully supported. A qualified name is an ncName or two ncNames separated by a colon (different from OWL uris).


data types and pretty printing (via show)

data Axis

axis specifier


Ancestor Bool

or self?

Descendant Bool

or self?

Following Bool


Preceding Bool




allAxis :: [Axis]

all possible values

lowerShow :: Show a => a -> String

utility to show (constant) constructors as lower case strings

showAxis :: Axis -> String

proper string representation (do not use show)

data NodeTest

testing attribute, namespace or element nodes (usually) by name


NameTest String

optional prefix and local part (possibly a * wildcard)

PI String

processing-instruction node type with optional literal


true for any node (therefore rarely used)


true for comment nodes


true for text nodes


nodeTypes :: [NodeTest]

all node types without processing-instruction

pIS :: String

the processing-instruction string

paren :: String -> String

put parens arount a string

showNodeTest :: NodeTest -> String

proper string representation (do not use show)

data Step

the stuff of a path between the slashes


Step Axis NodeTest [Expr]

with predicate list


showStep :: Step -> String

string representation considering abbreviations

isDescOrSelfNode :: Step -> Bool

test for descendant-or-self::node() step

data Path

only absolute paths may be empty


Path Bool [Step]



showSteps :: Bool -> [Step] -> String

show a path abbreviating /descendant-or-self::node()/

data PrimKind

indicator for primary expressions



leading dollar


single or double quotes


digits possibly with decimal point

data Expr

expressions where function calls, unary and infix expressions are generic


GenExpr Bool String [Expr]

infix?, op or fct, and arguments

PathExpr (Maybe Expr) Path

optional filter and path

FilterExpr Expr [Expr]

primary expression with predicates

PrimExpr PrimKind String 


showPred :: Expr -> String

put square brackets around an expression

showExpr :: Expr -> String

show expression with minimal parens

showInfixExpr :: String -> [Expr] -> String

show arguments with minimal parens interspersed with the infix operator. Also treat the unary minus where the argument list is a singleton. Alphanumeric operators are shown with spaces, which looks bad for mod and div in conjunction with additive, relational, or equality operators. The unary minus gets a leading blank if the preceding character is a ncNameChar.

parenExpr :: Bool -> Maybe Int -> Expr -> String

put parens around arguments that have a lower precedence or equal precendence if they are a right argument.

isPrimExpr :: Expr -> Bool

test if expression is primary

infix operators

eqOps :: [String]

unequal (!=) and equal (=)

relOps :: [String]

the four other comparisons

addOps :: [String]

+ and -, where - is allowed within names and as unary operator

multOps :: [String]

*, div and mod, where * is also used as wildcard for node names

inOps :: [[String]]

all infix operators. Lowest precedence for or followed by and, highest is union(|). Only these three operators may get more than two arguments.


skips :: Parser a -> Parser a

skip trailing spaces

symbol :: String -> Parser String

parse keyword and skip spaces

lpar :: Parser ()

skip left paren

rpar :: Parser ()

skip right paren

axis :: Parser Axis

non-abbreviated axis parser

abbrAxis :: Parser Axis

the axis specifier parser

ncNameStart :: Char -> Bool

starting name character (no unicode)

ncNameChar :: Char -> Bool

name character (without +) including centered dot (and no other unicode)

ncName :: Parser String

non-colon xml names (non-skipping)

literal :: Parser String

literal string within single or double quotes (skipping)

localName :: Parser String

ncName or wild-card (*) (skipping)

nodeTest :: Parser NodeTest

the node test parser

abbrStep :: Parser Step

parent or self abbreviated steps

predicate :: Parser Expr

the predicate (expression in square brackets) parser

step :: Parser Step

the step (stuff between slashes) parser

descOrSelfStep :: Step

the implicit descendant-or-self::node() step constant

doubleSlash :: Parser Bool

a double or single slash

slashStep :: Parser [Step]

a step starting with a single or double slash, the latter yielding two steps.

relPath :: Parser [Step]

parse the steps of a relative path

path :: Parser Path

a (possibly empty) absolute or (non-empty) relative path

number :: Parser String

at least one digit and at most one decimal point (skipping)

qualName :: Parser String

a qualified name (prefixed or unprefixed)

primExpr :: Parser Expr

parse a primary expression (including fct or expr in parens)

fct :: Parser Expr

parse a function call by checking the qname and the left paren

filterExpr :: Parser Expr

parse a filter expresssion as primary expression followed by predicates

pathExpr :: Parser Expr

a path expression is either a filter expression followed by a (non-empty) absoulte path or an ordinary path.

singleInfixExpr :: Parser Expr -> String -> Parser Expr

parse multiple argument expressions separated by an infix symbol

unionExpr :: Parser Expr

pathExpr are arguments of union expression

unaryExpr :: Parser Expr

unionExpr can be prefixed by the unary minus

leftAssocExpr :: Parser Expr -> [String] -> Parser Expr

parse as many arguments separated by any infix symbol as possible but construct left-associative binary application trees.

all final infix parsers using leftAssocExpr or singleInfixExpr

expr :: Parser Expr

the top-level expressions interspersed by or.

checking sanity of paths

isElementNode :: Step -> Bool

may this step have further steps

type FctEnv = [(String, (BasicType, [BasicType]))]

type VarEnv = [(String, BasicType)]

parseExpr :: String -> Either String Expr

parse string