Commit ff1eb3cd authored by m.eik michalke's avatar m.eik michalke
Browse files

merging changes (XiMpLe package) into master

  - was updated in releases/0.6.4
parent 7530590d
......@@ -3,6 +3,13 @@ ChangeLog for package XiMpLe
changes in version 0.03-24 (2015-11-24)
unreleased:
- this release is under development
added:
- new method validXML() for some basic validity checks (WIP)
- new class XiMpLe.validity to define valid child nodes and attributes
- new function is.XiMpLe.validity()
changed:
- moved docimentation of is.XiMpLe.node() and is.XiMpLe.doc() to the
respective classes
changes in version 0.03-23 (2015-11-24)
changed:
......
......@@ -19,17 +19,20 @@ LazyLoad: yes
URL: http://reaktanz.de/?c=hacking&s=XiMpLe
Authors@R: c(person(given="Meik", family="Michalke", email="meik.michalke@hhu.de", role=c("aut", "cre")))
Version: 0.03-24
Date: 2015-11-24
Date: 2015-12-08
RoxygenNote: 5.0.1
Collate:
'00_class_01_XiMpLe.node.R'
'00_class_02_XiMpLe.doc.R'
'00_class_03_XiMpLe.validity.R'
'01_method_01_pasteXML.R'
'XiMpLe-internal.R'
'01_method_02_node.R'
'01_method_03_show.R'
'01_method_04_validXML.R'
'XMLNode.R'
'XMLTree.R'
'XMLValidity.R'
'XiMpLe-package.R'
'parseXMLTree.R'
'pasteXMLTag.R'
......
......@@ -19,17 +19,22 @@ export(XMLNode)
export(XMLScan)
export(XMLScanDeep)
export(XMLTree)
export(XMLValidity)
export(XMLValue)
export(is.XiMpLe.doc)
export(is.XiMpLe.node)
export(is.XiMpLe.validity)
export(node)
export(parseXMLTree)
export(pasteXML)
export(pasteXMLNode)
export(pasteXMLTag)
export(pasteXMLTree)
export(validXML)
exportClasses(XiMpLe.doc)
exportClasses(XiMpLe.node)
exportClasses(XiMpLe.validity)
exportMethods("XMLScan<-")
exportMethods(show)
exportMethods(validXML)
import(methods)
......@@ -16,31 +16,31 @@
# along with XiMpLe. If not, see <http://www.gnu.org/licenses/>.
# Class XiMpLe.node
#
# This class is used to create DOM trees of XML documents, like objects that are returned
# by \code{\link[XiMpLe:parseXMLTree]{parseXMLTree}}.
#
# There are certain special values predefined for the \code{name} slot to easily create special XML elements:
# \describe{
# \item{\code{name=""}}{If the name is an empty character string, a pseudo node is created,
# \code{\link[XiMpLe:pasteXMLNode]{pasteXMLNode}} will paste its \code{value} as plain text.}
# \item{\code{name="!--"}}{Creates a comment tag, i.e., this will comment out all its \code{children}.}
# \item{\code{name="![CDATA["}}{Creates a CDATA section and places all its \code{children} in it.}
# \item{\code{name="*![CDATA["}}{Creates a CDATA section and places all its \code{children} in it, where the CDATA markers are
# commented out by \code{/* */}, as is used for JavaScript in XHTML.}
# }
#
# @slot name Name of the node (i.e., the XML tag identifier). For special names see details.
# @slot attributes A list of named character values, representing the attributes of this node.
# @slot children A list of further objects of class XiMpLe.node, representing child nodes of this node.
# @slot value Plain text to be used as the enclosed value of this node. Set to \code{value=""} if you
# want a childless node to be forced into an non-empty pair of start and end tags by \code{\link[XiMpLe:pasteXMLNode]{pasteXMLNode}}.
# @name XiMpLe.node,-class
# @aliases XiMpLe.node-class XiMpLe.node,-class
#' Class XiMpLe.node
#'
#' This class is used to create DOM trees of XML documents, like objects that are returned
#' by \code{\link[XiMpLe:parseXMLTree]{parseXMLTree}}.
#'
#' There are certain special values predefined for the \code{name} slot to easily create special XML elements:
#' \describe{
#' \item{\code{name=""}}{If the name is an empty character string, a pseudo node is created,
#' \code{\link[XiMpLe:pasteXMLNode]{pasteXMLNode}} will paste its \code{value} as plain text.}
#' \item{\code{name="!--"}}{Creates a comment tag, i.e., this will comment out all its \code{children}.}
#' \item{\code{name="![CDATA["}}{Creates a CDATA section and places all its \code{children} in it.}
#' \item{\code{name="*![CDATA["}}{Creates a CDATA section and places all its \code{children} in it, where the CDATA markers are
#' commented out by \code{/* */}, as is used for JavaScript in XHTML.}
#' }
#'
#' @slot name Name of the node (i.e., the XML tag identifier). For special names see details.
#' @slot attributes A list of named character values, representing the attributes of this node.
#' @slot children A list of further objects of class XiMpLe.node, representing child nodes of this node.
#' @slot value Plain text to be used as the enclosed value of this node. Set to \code{value=""} if you
#' want a childless node to be forced into an non-empty pair of start and end tags by \code{\link[XiMpLe:pasteXMLNode]{pasteXMLNode}}.
#' @name XiMpLe.node,-class
#' @aliases XiMpLe.node-class XiMpLe.node,-class
#' @import methods
# @keywords classes
# @rdname XiMpLe.node-class
#' @keywords classes
#' @rdname XiMpLe.node-class
#' @export
setClass("XiMpLe.node",
......@@ -59,30 +59,30 @@ setClass("XiMpLe.node",
)
setValidity("XiMpLe.node", function(object){
obj.name <- object@name
obj.attributes <- object@attributes
obj.children <- object@children
obj.value <- object@value
obj.name <- slot(object, "name")
obj.attributes <- slot(object, "attributes")
obj.children <- slot(object, "children")
obj.value <- slot(object, "value")
if(isTRUE(!nchar(obj.name) > 0) & isTRUE(!nchar(obj.value) > 0)){
print(str(object))
stop(simpleError("Invalid object: A node must at least have a name or a value!"))
} else {}
if(isTRUE(!nchar(obj.name) > 0) & isTRUE(!nchar(obj.value) > 0)){
print(str(object))
stop(simpleError("Invalid object: A node must at least have a name or a value!"))
} else {}
obj.attributes.names <- names(obj.attributes)
# if there are attributes, check that they all have names
if(length(obj.attributes) > 0){
if(length(obj.attributes) != length(obj.attributes.names)){
stop(simpleError("Invalid object: All attributes must have names!"))
} else {}
obj.attributes.names <- names(obj.attributes)
# if there are attributes, check that they all have names
if(length(obj.attributes) > 0){
if(length(obj.attributes) != length(obj.attributes.names)){
stop(simpleError("Invalid object: All attributes must have names!"))
} else {}
} else {}
# check content of children
if(length(obj.children) > 0){
child.nodes <- sapply(obj.children, function(this.child){is.XiMpLe.node(this.child)})
if(!all(child.nodes)){
stop(simpleError("Invalid object: All list elements of children must be of class XiMpLe.node!"))
} else {}
# check content of children
if(length(obj.children) > 0){
child.nodes <- sapply(obj.children, function(this.child){is.XiMpLe.node(this.child)})
if(!all(child.nodes)){
stop(simpleError("Invalid object: All list elements of children must be of class XiMpLe.node!"))
} else {}
} else {}
return(TRUE)
})
......@@ -16,20 +16,20 @@
# along with XiMpLe. If not, see <http://www.gnu.org/licenses/>.
# Class XiMpLe.doc
#
# This class is used for objects that are returned by \code{\link[XiMpLe:parseXMLTree]{parseXMLTree}}.
#
# @slot file Character string, Name of the file.
# @slot xml A named list, XML declaration of the file.
# @slot dtd A named list, Doctype definition of the file.
# @slot children A list of objects of class XiMpLe.node, representing the DOM structure of the XML document.
# @name XiMpLe.doc,-class
# @aliases XiMpLe.doc-class XiMpLe.doc,-class
#' Class XiMpLe.doc
#'
#' This class is used for objects that are returned by \code{\link[XiMpLe:parseXMLTree]{parseXMLTree}}.
#'
#' @slot file Character string, Name of the file.
#' @slot xml A named list, XML declaration of the file.
#' @slot dtd A named list, Doctype definition of the file.
#' @slot children A list of objects of class XiMpLe.node, representing the DOM structure of the XML document.
#' @name XiMpLe.doc,-class
#' @aliases XiMpLe.doc-class XiMpLe.doc,-class
#' @include 00_class_01_XiMpLe.node.R
#' @import methods
# @keywords classes
# @rdname XiMpLe.doc-class
#' @keywords classes
#' @rdname XiMpLe.doc-class
#' @export
setClass("XiMpLe.doc",
......@@ -48,30 +48,30 @@ setClass("XiMpLe.doc",
)
setValidity("XiMpLe.doc", function(object){
obj.xml <- object@xml
obj.dtd <- object@dtd
obj.children <- object@children
obj.xml <- slot(object, "xml")
obj.dtd <- slot(object, "dtd")
obj.children <- slot(object, "children")
obj.xml.names <- names(obj.xml)
obj.dtd.names <- names(obj.dtd)
# if there are declarations, check that they all have names
if(length(obj.xml) > 0){
if(length(obj.xml) != length(obj.xml.names)){
stop(simpleError("Invalid object: All xml declarations must have names!"))
} else {}
obj.xml.names <- names(obj.xml)
obj.dtd.names <- names(obj.dtd)
# if there are declarations, check that they all have names
if(length(obj.xml) > 0){
if(length(obj.xml) != length(obj.xml.names)){
stop(simpleError("Invalid object: All xml declarations must have names!"))
} else {}
if(length(obj.dtd) > 0){
if(length(obj.dtd) != length(obj.dtd.names)){
stop(simpleError("Invalid object: All doctype declarations must have names!"))
} else {}
} else {}
if(length(obj.dtd) > 0){
if(length(obj.dtd) != length(obj.dtd.names)){
stop(simpleError("Invalid object: All doctype declarations must have names!"))
} else {}
} else {}
# check content of children
if(length(obj.children) > 0){
child.nodes <- sapply(obj.children, function(this.child){is.XiMpLe.node(this.child)})
if(!all(child.nodes)){
stop(simpleError("Invalid object: All list elements of children must be of class XiMpLe.node!"))
} else {}
# check content of children
if(length(obj.children) > 0){
child.nodes <- sapply(obj.children, function(this.child){is.XiMpLe.node(this.child)})
if(!all(child.nodes)){
stop(simpleError("Invalid object: All list elements of children must be of class XiMpLe.node!"))
} else {}
} else {}
return(TRUE)
})
# Copyright 2015 Meik Michalke <meik.michalke@hhu.de>
#
# This file is part of the R package XiMpLe.
#
# XiMpLe is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# XiMpLe is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with XiMpLe. If not, see <http://www.gnu.org/licenses/>.
#' Class XiMpLe.validity
#'
#' Used for objects that describe valid child nodes and attributes of XiMpLe.nodes.
#'
#' You should use \code{\link[XiMpLe:XMLValidity]{XMLValidity}} to create objects of this class.
#'
#' @slot children Named list of character vectors, where the element name defines the parent node
#' name and each character string a valid child node name.
#' @slot attrs Named list of character vectors, where the element name defines the parent node
#' name and each character string a valid attribute name.
#' @slot allChildren Character vector, names of globally valid child nodes for all nodes, if any.
#' @slot allAttrs Character vector, names of globally valid attributes for all nodes, if any.
#' @slot empty Character vector, names of nodes that must be empty nodes (i.e., no closing tag), if any.
#' @name XiMpLe.validity,-class
#' @aliases XiMpLe.validity-class XiMpLe.validity,-class
#' @import methods
#' @keywords classes
#' @seealso
#' \code{\link[XiMpLe:XMLValidity]{XMLValidity}},
#' \code{\link[XiMpLe:validXML]{validXML}}
#' @rdname XiMpLe.validity-class
#' @export
setClass("XiMpLe.validity",
representation=representation(
children="list",
attrs="list",
allChildren="character",
allAttrs="character",
empty="character"
),
prototype(
children=list(),
attrs=list(),
allChildren=character(),
allAttrs=character(),
empty=character()
)
)
setValidity("XiMpLe.validity", function(object){
obj.children <- slot(object, "children")
obj.attrs <- slot(object, "attrs")
for (thisChild in obj.children){
if(!is.character(thisChild)){
stop(simpleError("Invalid object: all \"children\" must be character vectors!"))
} else {}
}
for (thisAttr in obj.attrs){
if(!is.character(thisAttr)){
stop(simpleError("Invalid object: all \"attrs\" must be character vectors!"))
} else {}
}
return(TRUE)
})
# Copyright 2015 Meik Michalke <meik.michalke@hhu.de>
#
# This file is part of the R package XiMpLe.
#
# XiMpLe is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# XiMpLe is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with XiMpLe. If not, see <http://www.gnu.org/licenses/>.
#' Validate S4 objects of XiMpLe XML classes
#'
#' Check whether objects of class \code{\link[XiMpLe:XiMpLe.doc-class]{XiMpLe.doc}}
#' or \code{\link[XiMpLe:XiMpLe.node-class]{XiMpLe.node}} have valid child nodes.
#'
#' XiMpLe can't handle DOM specifications yet, but this method can be used to construct
#' validation schemes.
#'
#' @note: If no \code{parent} is specified, \code{obj} will be checked recursively. If
#'
#' @param obj An object of class \code{XiMpLe.doc} or \code{XiMpLe.node}. If \code{parent=NULL}, this object
#' will be checked for validity, including its child nodes. If \code{parent} is either a character string
#' or another XiMpLe node, it will be checked whether \code{obj} is a valid child node of \code{parent}.
#' @param validity An object of class \code{\link[XiMpLe:XiMpLe.validity-class]{XiMpLe.validity}},
#' see \code{\link[XiMpLe:XMLValidity]{XMLValidity}}.
#' @param parent Either a character string (name of the parent node) or a XiMpLe node, whose name will be used
#' as name of the parent node.
#' @param children Logical, whether child node names should be checked for validity.
#' @param attributes Logical, whether attributes should be checked for validity.
#' @param warn Logical, whether invalid objects should cause a warning or stop with an error.
#' @param section Either a character string (name of the section) or a XiMpLe node, whose name will be used
#' as name of the XML section this check refers to. This is only relevant for warnings and error messages,
#' in case you want to use something different than the actual parent node name.
#' @param caseSens Logical, whether checks should be case sensitive or not.
#' @return Returns \code{TRUE} if tests pass, and depending on the setting of \code{warn} either \code{FALSE} or
#' an error if a test fails.
#' @aliases
#' validXML,-methods
#' validXML,XiMpLe.doc-method
#' validXML,XiMpLe.node-method
#' validXML,XiMpLe.XML-method
#' @seealso
#' \code{\link[XiMpLe:validXML]{validXML}},
#' \code{\link[XiMpLe:XMLValidity]{XMLValidity}},
#' \code{\link[XiMpLe:XiMpLe.doc-class]{XiMpLe.doc}}, and
#' \code{\link[XiMpLe:XiMpLe.node-class]{XiMpLe.node}}
#' @keywords methods
#' @docType methods
#' @export
#' @rdname validXML
#' @include 00_class_01_XiMpLe.node.R
#' @include 00_class_02_XiMpLe.doc.R
setGeneric("validXML", function(obj, validity=XMLValidity(), parent=NULL, children=TRUE, attributes=TRUE, warn=FALSE, section=parent, caseSens=TRUE){standardGeneric("validXML")})
#' @rdname validXML
#' @export
#' @examples
#' HTMLish <- XMLValidity(
#' children=list(
#' body=c("a", "p", "ol", "ul", "strong"),
#' head=c("title"),
#' html=c("head", "body"),
#' li=c("a", "br", "strong"),
#' ol=c("li"),
#' p=c("a", "br", "ol", "ul", "strong"),
#' ul=c("li")
#' ),
#' attrs=list(
#' a=c("href", "name"),
#' p=c("align")
#' ),
#' allChildren=c("!--"),
#' allAttrs=c("id", "class"),
#' empty=c("br")
#' )
#' # make XML object
#' validChildNodes <- XMLNode("html",
#' XMLNode("head",
#' XMLNode("!--", "comment always passes"),
#' XMLNode("title", "test")
#' ),
#' XMLNode("body",
#' XMLNode("p",
#' XMLNode("a", "my link"),
#' XMLNode("br"),
#' "text goes on"
#' )
#' )
#' )
#' invalidChildNodes <- XMLNode("html",
#' XMLNode("head",
#' XMLNode("title",
#' XMLNode("body", "test")
#' )
#' )
#' )
#'
#' # do validity checks
#' # the first should pass
#' validXML(
#' validChildNodes,
#' validity=HTMLish
#' )
#'
#' # now this one should cause a warning and return FALSE
#' validXML(
#' invalidChildNodes,
#' validity=HTMLish,
#' warn=TRUE
#' )
setMethod("validXML", signature(obj="XiMpLe.XML"), function(obj, validity=XMLValidity(), parent=NULL, children=TRUE, attributes=TRUE,
warn=FALSE, section=parent, caseSens=TRUE){
childValidity <- attributeValidity <- emptyValidity <- NULL
if(!is.XiMpLe.validity(validity)){
stop(simpleError(paste0(
"Invalid value for \"validity\": Got class ",
class(validity),
", should be XiMpLe.validity!"))
)
}
# two possibilities:
# a) there's no "parent" value
# we're checking "obj" as the parent node itself
# - check attributes of "obj" directly
# - check child nodes of "obj" for valid node names
# - check if "obj" should be empty but is not
# - recursion: check attributes of child nodes etc.
# b) "parent" is given
# we're checking "obj" as child node for a given parent
# - check if "obj" node name is valid for parent node
# - check attributes of "obj"
# - no recursion
recursion <- FALSE
if(is.null(parent)){
parentName <- XMLName(obj)
nodeChildren <- XMLChildren(obj)
# check for violations of mandatory empty nodes
emptyNodes <- slot(validity, "empty")
if(!isTRUE(caseSens)){
emptyNodes <- tolower(emptyNodes)
} else {}
if(parentName %in% emptyNodes){
if(length(nodeChildren) > 0 | !identical(XMLValue(obj), character())){
return.message <- paste0("Invalid XML node <", parentName, " />: Should be empty, but it isn't!")
if(isTRUE(warn)){
warning(return.message, call.=FALSE)
emptyValidity <- FALSE
} else {
stop(simpleError(return.message))
}
} else {}
} else {}
recursion <- TRUE
} else if(is.XiMpLe.node(parent)){
parentName <- XMLName(parent)
} else if(is.character(parent) & length(parent) == 1){
parentName <- parent
} else {
stop(simpleError(paste0(
"Invalid value for \"parent\": Got class \"",
class(parent),
"\", should be XiMpLe.node or single character string!"))
)
}
if(is.null(section)){
section <- parentName
} else if(is.XiMpLe.node(section)){
section <- XMLName(section)
} else if(!is.character(section) | length(section) != 1){
stop(simpleError(paste0(
"Invalid value for \"section\": Got class \"",
class(section),
"\", should be XiMpLe.node or single character string!"))
)
} else {}
if(isTRUE(children)){
if(isTRUE(recursion)){
# are there any children to check in the first place?
if(length(nodeChildren) > 0){
childValidity <- all(sapply(
nodeChildren,
function(thisChild){
# check child itself
thisChildValidity <- valid.child(
parent=parentName,
children=thisChild,
validity=validity,
warn=warn,
section=section,
caseSens=caseSens
)
# check grandchildren
grandChildValidity <- validXML(
thisChild,
validity=validity,
children=children,
attributes=attributes,
warn=warn,
section=thisChild,
caseSens=caseSens
)
return(all(thisChildValidity, grandChildValidity))
}
))
} else {
childValidity <- NULL
}
} else {
childValidity <- valid.child(
parent=parentName,
children=obj,
validity=validity,
warn=warn,
section=section,
caseSens=caseSens
)
}
} else {}
if(isTRUE(attributes)){
# we only check attributes of "obj"
attributeValidityObj <- valid.attribute(
node=XMLName(obj),
attrs=XMLAttrs(obj),
validity=validity,
warn=warn,
caseSens=caseSens
)
if(isTRUE(recursion) & !isTRUE(children)){
# we can skip this if children was TRUE, because attributes were
# already checked recursively, then. but if not:
# are there any children to check in the first place?
if(length(nodeChildren) > 0){
attributeValidityRecursive <- all(sapply(
nodeChildren,
function(thisChild){
# because of the recursion this checks the attributes of "thisChild"
thisChildValidity <- validXML(
thisChild,
validity=validity,
children=FALSE,
attributes=TRUE,
warn=warn,
section=thisChild,
caseSens=caseSens
)
return(thisChildValidity)
}
))
} else {
attributeValidityRecursive <- NULL
}
} else {
attributeValidityRecursive <- NULL
}
attributeValidity <- all(attributeValidityObj, attributeValidityRecursive)
} else {}
return(all(childValidity, attributeValidity, emptyValidity))
})
# Copyright 2015 Meik Michalke <meik.michalke@hhu.de>
#
# This file is part of the R package XiMpLe.