<?php
/**
* @package dompdf
* @link http://dompdf.github.com/
* @author Benj Carson <benjcarson@digitaljunkies.ca>
* @author Helmut Tischer <htischer@weihenstephan.org>
* @author Fabien Ménager <fabien.menager@gmail.com>
* @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License
*/
namespace Dompdf\Css;
use DOMElement;
use DOMXPath;
use Dompdf\Dompdf;
use Dompdf\Helpers;
use Dompdf\Exception;
use Dompdf\FontMetrics;
use Dompdf\Frame\FrameTree;
/**
* The master stylesheet class
*
* The Stylesheet class is responsible for parsing stylesheets and style
* tags/attributes. It also acts as a registry of the individual Style
* objects generated by the current set of loaded CSS files and style
* elements.
*
* @see Style
* @package dompdf
*/
class Stylesheet
{
/**
* The location of the default built-in CSS file.
*/
const DEFAULT_STYLESHEET = "/lib/res/html.css";
/**
* User agent stylesheet origin
*
* @var int
*/
const ORIG_UA = 1;
/**
* User normal stylesheet origin
*
* @var int
*/
const ORIG_USER = 2;
/**
* Author normal stylesheet origin
*
* @var int
*/
const ORIG_AUTHOR = 3;
/*
* The highest possible specificity is 0x01000000 (and that is only for author
* stylesheets, as it is for inline styles). Origin precedence can be achieved by
* adding multiples of 0x10000000 to the actual specificity. Important
* declarations are handled in Style; though technically they should be handled
* here so that user important declarations can be made to take precedence over
* user important declarations, this doesn't matter in practice as Dompdf does
* not support user stylesheets, and user agent stylesheets can not include
* important declarations.
*/
private static $_stylesheet_origins = [
self::ORIG_UA => 0x00000000, // user agent declarations
self::ORIG_USER => 0x10000000, // user normal declarations
self::ORIG_AUTHOR => 0x30000000, // author normal declarations
];
/**
* Non-CSS presentational hints (i.e. HTML 4 attributes) are handled as if added
* to the beginning of an author stylesheet, i.e. anything in author stylesheets
* should override them.
*/
const SPEC_NON_CSS = 0x20000000;
/**
* Current dompdf instance
*
* @var Dompdf
*/
private $_dompdf;
/**
* Array of currently defined styles
*
* @var Style[]
*/
private $_styles;
/**
* Base protocol of the document being parsed
* Used to handle relative urls.
*
* @var string
*/
private $_protocol = "";
/**
* Base hostname of the document being parsed
* Used to handle relative urls.
*
* @var string
*/
private $_base_host = "";
/**
* Base path of the document being parsed
* Used to handle relative urls.
*
* @var string
*/
private $_base_path = "";
/**
* The styles defined by @page rules
*
* @var array<Style>
*/
private $_page_styles;
/**
* List of loaded files, used to prevent recursion
*
* @var array
*/
private $_loaded_files;
/**
* Current stylesheet origin
*
* @var int
*/
private $_current_origin = self::ORIG_UA;
/**
* Accepted CSS media types
* List of types and parsing rules for future extensions:
* http://www.w3.org/TR/REC-html40/types.html
* screen, tty, tv, projection, handheld, print, braille, aural, all
* The following are non standard extensions for undocumented specific environments.
* static, visual, bitmap, paged, dompdf
* Note, even though the generated pdf file is intended for print output,
* the desired content might be different (e.g. screen or projection view of html file).
* Therefore allow specification of content by dompdf setting Options::defaultMediaType.
* If given, replace media "print" by Options::defaultMediaType.
* (Previous version $ACCEPTED_MEDIA_TYPES = $ACCEPTED_GENERIC_MEDIA_TYPES + $ACCEPTED_DEFAULT_MEDIA_TYPE)
*/
static $ACCEPTED_DEFAULT_MEDIA_TYPE = "print";
static $ACCEPTED_GENERIC_MEDIA_TYPES = ["all", "static", "visual", "bitmap", "paged", "dompdf"];
static $VALID_MEDIA_TYPES = ["all", "aural", "bitmap", "braille", "dompdf", "embossed", "handheld", "paged", "print", "projection", "screen", "speech", "static", "tty", "tv", "visual"];
/**
* @var FontMetrics
*/
private $fontMetrics;
/**
* The class constructor.
*
* The base protocol, host & path are initialized to those of
* the current script.
*/
function __construct(Dompdf $dompdf)
{
$this->_dompdf = $dompdf;
$this->setFontMetrics($dompdf->getFontMetrics());
$this->_styles = [];
$this->_loaded_files = [];
$script = __FILE__;
if (isset($_SERVER["SCRIPT_FILENAME"])) {
$script = $_SERVER["SCRIPT_FILENAME"];
}
list($this->_protocol, $this->_base_host, $this->_base_path) = Helpers::explode_url($script);
$this->_page_styles = ["base" => new Style($this)];
}
/**
* Set the base protocol
*
* @param string $protocol
*/
function set_protocol(string $protocol)
{
$this->_protocol = $protocol;
}
/**
* Set the base host
*
* @param string $host
*/
function set_host(string $host)
{
$this->_base_host = $host;
}
/**
* Set the base path
*
* @param string $path
*/
function set_base_path(string $path)
{
$this->_base_path = $path;
}
/**
* Return the Dompdf object
*
* @return Dompdf
*/
function get_dompdf()
{
return $this->_dompdf;
}
/**
* Return the base protocol for this stylesheet
*
* @return string
*/
function get_protocol()
{
return $this->_protocol;
}
/**
* Return the base host for this stylesheet
*
* @return string
*/
function get_host()
{
return $this->_base_host;
}
/**
* Return the base path for this stylesheet
*
* @return string
*/
function get_base_path()
{
return $this->_base_path;
}
/**
* Return the array of page styles
*
* @return Style[]
*/
function get_page_styles()
{
return $this->_page_styles;
}
/**
* Create a new Style object associated with this stylesheet
*
* @return Style
*/
function create_style(): Style
{
return new Style($this, $this->_current_origin);
}
/**
* Add a new Style object to the stylesheet
*
* The style's origin is changed to the current origin of the stylesheet.
*
* @param string $key the Style's selector
* @param Style $style the Style to be added
*/
function add_style(string $key, Style $style): void
{
if (!isset($this->_styles[$key])) {
$this->_styles[$key] = [];
}
$style->set_origin($this->_current_origin);
$this->_styles[$key][] = $style;
}
/**
* load and parse a CSS string
*
* @param string $css
* @param int $origin
*/
function load_css(&$css, $origin = self::ORIG_AUTHOR)
{
if ($origin) {
$this->_current_origin = $origin;
}
$this->_parse_css($css);
}
/**
* load and parse a CSS file
*
* @param string $file
* @param int $origin
*/
function load_css_file($file, $origin = self::ORIG_AUTHOR)
{
if ($origin) {
$this->_current_origin = $origin;
}
// Prevent circular references
if (isset($this->_loaded_files[$file])) {
return;
}
$this->_loaded_files[$file] = true;
if (strpos($file, "data:") === 0) {
$parsed = Helpers::parse_data_uri($file);
$css = $parsed["data"];
} else {
$options = $this->_dompdf->getOptions();
$parsed_url = Helpers::explode_url($file);
$protocol = $parsed_url["protocol"];
if ($file !== $this->getDefaultStylesheet()) {
$allowed_protocols = $options->getAllowedProtocols();
if (!array_key_exists($protocol, $allowed_protocols)) {
Helpers::record_warnings(E_USER_WARNING, "Permission denied on $file. The communication protocol is not supported.", __FILE__, __LINE__);
return;
}
foreach ($allowed_protocols[$protocol]["rules"] as $rule) {
[$result, $message] = $rule($file);
if (!$result) {
Helpers::record_warnings(E_USER_WARNING, "Error loading $file: $message", __FILE__, __LINE__);
return;
}
}
}
[$css, $http_response_header] = Helpers::getFileContent($file, $this->_dompdf->getHttpContext());
$good_mime_type = true;
// See http://the-stickman.com/web-development/php/getting-http-response-headers-when-using-file_get_contents/
if (isset($http_response_header) && !$this->_dompdf->getQuirksmode()) {
foreach ($http_response_header as $_header) {
if (preg_match("@Content-Type:\s*([\w/]+)@i", $_header, $matches) &&
($matches[1] !== "text/css")
) {
$good_mime_type = false;
}
}
}
if (!$good_mime_type || $css === null) {
Helpers::record_warnings(E_USER_WARNING, "Unable to load css file $file", __FILE__, __LINE__);
return;
}
[$this->_protocol, $this->_base_host, $this->_base_path] = $parsed_url;
}
$this->_parse_css($css);
}
/**
* @link http://www.w3.org/TR/CSS21/cascade.html#specificity
*
* @param string $selector
* @param int $origin :
* - Stylesheet::ORIG_UA: user agent style sheet
* - Stylesheet::ORIG_USER: user style sheet
* - Stylesheet::ORIG_AUTHOR: author style sheet
*
* @return int
*/
private function _specificity($selector, $origin = self::ORIG_AUTHOR)
{
// http://www.w3.org/TR/CSS21/cascade.html#specificity
// ignoring the ":" pseudoclass modifiers
// also ignored in _css_selector_to_xpath
$a = ($selector === "!attr") ? 1 : 0;
$b = min(mb_substr_count($selector, "#"), 255);
$c = min(mb_substr_count($selector, ".") +
mb_substr_count($selector, "["), 255);
$d = min(mb_substr_count($selector, " ") +
mb_substr_count($selector, ">") +
mb_substr_count($selector, "+") +
mb_substr_count($selector, "~") -
mb_substr_count($selector, "~="), 255);
//If a normal element name is at the beginning of the string,
//a leading whitespace might have been removed on whitespace collapsing and removal
//therefore there might be one whitespace less as selected element names
//this can lead to a too small specificity
//see _css_selector_to_xpath
if (!in_array($selector[0], [" ", ">", ".", "#", "+", "~", ":", "["]) && $selector !== "*") {
$d++;
}
if ($this->_dompdf->getOptions()->getDebugCss()) {
/*DEBUGCSS*/
print "<pre>\n";
/*DEBUGCSS*/
printf("_specificity(): 0x%08x \"%s\"\n", self::$_stylesheet_origins[$origin] + (($a << 24) | ($b << 16) | ($c << 8) | ($d)), $selector);
/*DEBUGCSS*/
print "</pre>";
}
return self::$_stylesheet_origins[$origin] + (($a << 24) | ($b << 16) | ($c << 8) | ($d));
}
/**
* Converts a CSS selector to an XPath query.
*
* @param string $selector
* @param bool $first_pass
*
* @throws Exception
* @return array
*/
private function _css_selector_to_xpath(string $selector, bool $first_pass = false): array
{
// Collapse white space and strip whitespace around delimiters
//$search = array("/\\s+/", "/\\s+([.>#+:])\\s+/");
//$replace = array(" ", "\\1");
//$selector = preg_replace($search, $replace, trim($selector));
// Initial query (non-absolute)
$query = "//";
// Will contain :before and :after
$pseudo_elements = [];
// Will contain :link, etc
$pseudo_classes = [];
// Parse the selector
//$s = preg_split("/([ :>.#+])/", $selector, -1, PREG_SPLIT_DELIM_CAPTURE);
$delimiters = [" ", ">", ".", "#", "+", "~", ":", "[", "("];
// Add an implicit * at the beginning of the selector
// if it begins with an attribute selector
if ($selector[0] === "[") {
$selector = "*$selector";
}
// Add an implicit space at the beginning of the selector if there is no
// delimiter there already.
if (!in_array($selector[0], $delimiters)) {
$selector = " $selector";
}
$tok = "";
$len = mb_strlen($selector);
$i = 0;
while ($i < $len) {
$s = $selector[$i];
$i++;
// Eat characters up to the next delimiter
$tok = "";
$in_attr = false;
$in_func = false;
while ($i < $len) {
$c = $selector[$i];
$c_prev = $selector[$i - 1];
if (!$in_func && !$in_attr && in_array($c, $delimiters) && !(($c == $c_prev) == ":")) {
break;
}
if ($c_prev === "[") {
$in_attr = true;
}
if ($c_prev === "(") {
$in_func = true;
}
$tok .= $selector[$i++];
if ($in_attr && $c === "]") {
$in_attr = false;
break;
}
if ($in_func && $c === ")") {
$in_func = false;
break;
}
}
switch ($s) {
case " ":
case ">":
// All elements matching the next token that are direct children of
// the current token
$expr = $s === " " ? "descendant" : "child";
if (mb_substr($query, -1, 1) !== "/") {
$query .= "/";
}
// Tag names are case-insensitive
$tok = strtolower($tok);
if (!$tok) {
$tok = "*";
}
$query .= "$expr::$tok";
$tok = "";
break;
case ".":
case "#":
// All elements matching the current token with a class/id equal to
// the _next_ token.
$attr = $s === "." ? "class" : "id";
// empty class/id == *
if (mb_substr($query, -1, 1) === "/") {
$query .= "*";
}
// Match multiple classes: $tok contains the current selected
// class. Search for class attributes with class="$tok",
// class=".* $tok .*" and class=".* $tok"
// This doesn't work because libxml only supports XPath 1.0...
//$query .= "[matches(@$attr,\"^{$tok}\$|^{$tok}[ ]+|[ ]+{$tok}\$|[ ]+{$tok}[ ]+\")]";
$query .= "[contains(concat(' ', normalize-space(@$attr), ' '), concat(' ', '$tok', ' '))]";
$tok = "";
break;
case "+":
case "~":
// Next-sibling combinator
// Subsequent-sibling combinator
// https://www.w3.org/TR/selectors-3/#sibling-combinators
if (mb_substr($query, -1, 1) !== "/") {
$query .= "/";
}
// Tag names are case-insensitive
$tok = strtolower($tok);
if (!$tok) {
$tok = "*";
}
$query .= "following-sibling::$tok";
if ($s === "+") {
$query .= "[1]";
}
$tok = "";
break;
case ":":
$i2 = $i - strlen($tok) - 2; // the char before ":"
if (($i2 < 0 || !isset($selector[$i2]) || (in_array($selector[$i2], $delimiters) && $selector[$i2] != ":")) && substr($query, -1) != "*") {
$query .= "*";
}
$last = false;
// Pseudo-classes
switch ($tok) {
case "first-child":
$query .= "[not(preceding-sibling::*)]";
$tok = "";
break;
case "last-child":
$query .= "[not(following-sibling::*)]";
$tok = "";
break;
case "first-of-type":
$query .= "[position() = 1]";
$tok = "";
break;
case "last-of-type":
$query .= "[position() = last()]";
$tok = "";
break;
// an+b, n, odd, and even
/** @noinspection PhpMissingBreakStatementInspection */
case "nth-last-of-type":
$last = true;
case "nth-of-type":
//FIXME: this fix-up is pretty ugly, would parsing the selector in reverse work better generally?
$descendant_delimeter = strrpos($query, "::");
$isChild = substr($query, $descendant_delimeter-5, 5) == "child";
$el = substr($query, $descendant_delimeter+2);
$query = substr($query, 0, strrpos($query, "/")) . ($isChild ? "/" : "//") . $el;
$pseudo_classes[$tok] = true;
$p = $i + 1;
$nth = trim(mb_substr($selector, $p, strpos($selector, ")", $i) - $p));
$position = $last ? "(last()-position()+1)" : "position()";
// 1
if (preg_match("/^\d+$/", $nth)) {
$condition = "$position = $nth";
} // odd
elseif ($nth === "odd") {
$condition = "($position mod 2) = 1";
} // even
elseif ($nth === "even") {
$condition = "($position mod 2) = 0";
} // an+b
else {
$condition = $this->_selector_an_plus_b($nth, $last);
}
$query .= "[$condition]";
$tok = "";
break;
/** @noinspection PhpMissingBreakStatementInspection */
case "nth-last-child":
$last = true;
case "nth-child":
//FIXME: this fix-up is pretty ugly, would parsing the selector in reverse work better generally?
$descendant_delimeter = strrpos($query, "::");
$isChild = substr($query, $descendant_delimeter-5, 5) == "child";
$el = substr($query, $descendant_delimeter+2);
$query = substr($query, 0, strrpos($query, "/")) . ($isChild ? "/" : "//") . "*";
$pseudo_classes[$tok] = true;
$p = $i + 1;
$nth = trim(mb_substr($selector, $p, strpos($selector, ")", $i) - $p));
$position = $last ? "(last()-position()+1)" : "position()";
// 1
if (preg_match("/^\d+$/", $nth)) {
$condition = "$position = $nth";
} // odd
elseif ($nth === "odd") {
$condition = "($position mod 2) = 1";
} // even
elseif ($nth === "even") {
$condition = "($position mod 2) = 0";
} // an+b
else {
$condition = $this->_selector_an_plus_b($nth, $last);
}
$query .= "[$condition]";
if ($el != "*") {
$query .= "[name() = '$el']";
}
$tok = "";
break;
//TODO: bit of a hack attempt at matches support, currently only matches against elements
case "matches":
$pseudo_classes[$tok] = true;
$p = $i + 1;
$matchList = trim(mb_substr($selector, $p, strpos($selector, ")", $i) - $p));
// Tag names are case-insensitive
$elements = array_map("trim", explode(",", strtolower($matchList)));
foreach ($elements as &$element) {
$element = "name() = '$element'";
}
$query .= "[" . implode(" or ", $elements) . "]";
$tok = "";
break;
case "link":
$query .= "[@href]";
$tok = "";
break;
case "first-line":
case ":first-line":
case "first-letter":
case ":first-letter":
// TODO
$el = trim($tok, ":");
$pseudo_elements[$el] = true;
break;
// N/A
case "focus":
case "active":
case "hover":
case "visited":
$query .= "[false()]";
$tok = "";
break;
/* Pseudo-elements */
case "before":
case ":before":
case "after":
case ":after":
$pos = trim($tok, ":");
$pseudo_elements[$pos] = true;
if (!$first_pass) {
$query .= "/*[@$pos]";
}
$tok = "";
break;
case "empty":
$query .= "[not(*) and not(normalize-space())]";
$tok = "";
break;
case "disabled":
case "checked":
$query .= "[@$tok]";
$tok = "";
break;
case "enabled":
$query .= "[not(@disabled)]";
$tok = "";
break;
// the selector is not handled, until we support all possible selectors force an empty set (silent failure)
default:
$query = "/../.."; // go up two levels because generated content starts on the body element
$tok = "";
break;
}
break;
case "[":
// Attribute selectors. All with an attribute matching the following token(s)
// https://www.w3.org/TR/selectors-3/#attribute-selectors
$attr_delimiters = ["=", "]", "~", "|", "$", "^", "*"];
$tok_len = mb_strlen($tok);
$j = 0;
$attr = "";
$op = "";
$value = "";
while ($j < $tok_len) {
if (in_array($tok[$j], $attr_delimiters)) {
break;
}
$attr .= $tok[$j++];
}
switch ($tok[$j]) {
case "~":
case "|":
case "$":
case "^":
case "*":
$op .= $tok[$j++];
if ($tok[$j] !== "=") {
throw new Exception("Invalid CSS selector syntax: invalid attribute selector: $selector");
}
$op .= $tok[$j];
break;
case "=":
$op = "=";
break;
}
// Read the attribute value, if required
if ($op != "") {
$j++;
while ($j < $tok_len) {
if ($tok[$j] === "]") {
break;
}
$value .= $tok[$j++];
}
}
if ($attr == "") {
throw new Exception("Invalid CSS selector syntax: missing attribute name");
}
$value = trim($value, "\"'");
switch ($op) {
case "":
$query .= "[@$attr]";
break;
case "=":
$query .= "[@$attr=\"$value\"]";
break;
case "~=":
// FIXME: this will break if $value contains quoted strings
// (e.g. [type~="a b c" "d e f"])
// FIXME: Don't match anything if value contains
// whitespace or is the empty string
$query .= "[contains(concat(' ', normalize-space(@$attr), ' '), concat(' ', '$value', ' '))]";
break;
case "|=":
$values = explode("-", $value);
$query .= "[";
foreach ($values as $val) {
$query .= "starts-with(@$attr, \"$val\") or ";
}
$query = rtrim($query, " or ") . "]";
break;
case "$=":
$query .= "[substring(@$attr, string-length(@$attr)-" . (strlen($value) - 1) . ")=\"$value\"]";
break;
case "^=":
$query .= "[starts-with(@$attr,\"$value\")]";
break;
case "*=":
$query .= "[contains(@$attr,\"$value\")]";
break;
}
break;
}
}
$i++;
// case ":":
// // Pseudo selectors: ignore for now. Partially handled directly
// // below.
// // Skip until the next special character, leaving the token as-is
// while ( $i < $len ) {
// if ( in_array($selector[$i], $delimiters) )
// break;
// $i++;
// }
// break;
// default:
// // Add the character to the token
// $tok .= $selector[$i++];
// break;
// }
// }
// Trim the trailing '/' from the query
if (mb_strlen($query) > 2) {
$query = rtrim($query, "/");
}
return ['query' => $query, 'pseudo_elements' => $pseudo_elements];
}
/**
* https://github.com/tenderlove/nokogiri/blob/master/lib/nokogiri/css/xpath_visitor.rb
*
* @param string $expr
* @param bool $last
*
* @return string
*/
protected function _selector_an_plus_b(string $expr, bool $last = false): string
{
$expr = preg_replace("/\s/", "", $expr);
if (!preg_match("/^(?P<a>-?[0-9]*)?n(?P<b>[-+]?[0-9]+)?$/", $expr, $matches)) {
return "false()";
}
$a = (isset($matches["a"]) && $matches["a"] !== "") ? ($matches["a"] !== "-" ? intval($matches["a"]) : -1) : 1;
$b = (isset($matches["b"]) && $matches["b"] !== "") ? intval($matches["b"]) : 0;
$position = $last ? "(last()-position()+1)" : "position()";
if ($b == 0) {
return "($position mod $a) = 0";
} else {
$compare = ($a < 0) ? "<=" : ">=";
$b2 = -$b;
if ($b2 >= 0) {
$b2 = "+$b2";
}
return "($position $compare $b) and ((($position $b2) mod " . abs($a) . ") = 0)";
}
}
/**
* applies all current styles to a particular document tree
*
* apply_styles() applies all currently loaded styles to the provided
* {@link FrameTree}. Aside from parsing CSS, this is the main purpose
* of this class.
*
* @param \Dompdf\Frame\FrameTree $tree
*/
function apply_styles(FrameTree $tree)
{
// Use XPath to select nodes. This would be easier if we could attach
// Frame objects directly to DOMNodes using the setUserData() method, but
// we can't do that just yet. Instead, we set a _node attribute_ in
// Frame->set_id() and use that as a handle on the Frame object via
// FrameTree::$_registry.
// We create a scratch array of styles indexed by frame id. Once all
// styles have been assigned, we order the cached styles by specificity
// and create a final style object to assign to the frame.
// FIXME: this is not particularly robust...
$styles = [];
$xp = new DOMXPath($tree->get_dom());
$DEBUGCSS = $this->_dompdf->getOptions()->getDebugCss();
// Add generated content
foreach ($this->_styles as $selector => $selector_styles) {
/** @var Style $style */
foreach ($selector_styles as $style) {
if (strpos($selector, ":before") === false && strpos($selector, ":after") === false) {
continue;
}
$query = $this->_css_selector_to_xpath($selector, true);
// Retrieve the nodes, limit to body for generated content
//TODO: If we use a context node can we remove the leading dot?
$nodes = @$xp->query('.' . $query["query"]);
if ($nodes === false) {
Helpers::record_warnings(E_USER_WARNING, "The CSS selector '$selector' is not valid", __FILE__, __LINE__);
continue;
}
/** @var \DOMElement $node */
foreach ($nodes as $node) {
// Only DOMElements get styles
if ($node->nodeType != XML_ELEMENT_NODE) {
continue;
}
foreach (array_keys($query["pseudo_elements"], true, true) as $pos) {
// Do not add a new pseudo element if another one already matched
if ($node->hasAttribute("dompdf_{$pos}_frame_id")) {
continue;
}
$content = $style->get_specified("content");
// Do not create non-displayed before/after pseudo elements
// https://www.w3.org/TR/CSS21/generate.html#content
// https://www.w3.org/TR/CSS21/generate.html#undisplayed-counters
if ($content === "normal" || $content === "none") {
continue;
}
if (($src = $this->resolve_url($content)) !== "none") {
$new_node = $node->ownerDocument->createElement("img_generated");
$new_node->setAttribute("src", $src);
} else {
$new_node = $node->ownerDocument->createElement("dompdf_generated");
}
$new_node->setAttribute($pos, $pos);
$new_frame_id = $tree->insert_node($node, $new_node, $pos);
$node->setAttribute("dompdf_{$pos}_frame_id", $new_frame_id);
}
}
}
}
// Apply all styles in stylesheet
foreach ($this->_styles as $selector => $selector_styles) {
/** @var Style $style */
foreach ($selector_styles as $style) {
$query = $this->_css_selector_to_xpath($selector);
// Retrieve the nodes
$nodes = @$xp->query($query["query"]);
if ($nodes === false) {
Helpers::record_warnings(E_USER_WARNING, "The CSS selector '$selector' is not valid", __FILE__, __LINE__);
continue;
}
$spec = $this->_specificity($selector, $style->get_origin());
foreach ($nodes as $node) {
// Retrieve the node id
// Only DOMElements get styles
if ($node->nodeType != XML_ELEMENT_NODE) {
continue;
}
$id = $node->getAttribute("frame_id");
// Assign the current style to the scratch array
$styles[$id][$spec][] = $style;
}
}
}
// Set the page width, height, and orientation based on the canvas paper size
$canvas = $this->_dompdf->getCanvas();
$paper_width = $canvas->get_width();
$paper_height = $canvas->get_height();
$paper_orientation = ($paper_width > $paper_height ? "landscape" : "portrait");
if ($this->_page_styles["base"] && is_array($this->_page_styles["base"]->size)) {
$paper_width = $this->_page_styles['base']->size[0];
$paper_height = $this->_page_styles['base']->size[1];
$paper_orientation = ($paper_width > $paper_height ? "landscape" : "portrait");
}
// Now create the styles and assign them to the appropriate frames. (We
// iterate over the tree using an implicit FrameTree iterator.)
$root_flg = false;
foreach ($tree as $frame) {
// Helpers::pre_r($frame->get_node()->nodeName . ":");
if (!$root_flg && $this->_page_styles["base"]) {
$style = $this->_page_styles["base"];
} else {
$style = $this->create_style();
}
// Find nearest DOMElement parent
$p = $frame;
while ($p = $p->get_parent()) {
if ($p->get_node()->nodeType === XML_ELEMENT_NODE) {
break;
}
}
// Styles can only be applied directly to DOMElements; anonymous
// frames inherit from their parent
if ($frame->get_node()->nodeType !== XML_ELEMENT_NODE) {
$style->inherit($p ? $p->get_style() : null);
$frame->set_style($style);
continue;
}
$id = $frame->get_id();
// Handle HTML 4.0 attributes
AttributeTranslator::translate_attributes($frame);
if (($str = $frame->get_node()->getAttribute(AttributeTranslator::$_style_attr)) !== "") {
$styles[$id][self::SPEC_NON_CSS][] = $this->_parse_properties($str);
}
// Locate any additional style attributes
if (($str = $frame->get_node()->getAttribute("style")) !== "") {
// Destroy CSS comments
$str = preg_replace("'/\*.*?\*/'si", "", $str);
$spec = $this->_specificity("!attr", self::ORIG_AUTHOR);
$styles[$id][$spec][] = $this->_parse_properties($str);
}
// Grab the applicable styles
if (isset($styles[$id])) {
/** @var array[][] $applied_styles */
$applied_styles = $styles[$id];
// Sort by specificity
ksort($applied_styles);
if ($DEBUGCSS) {
$debug_nodename = $frame->get_node()->nodeName;
print "<pre>\n$debug_nodename [\n";
foreach ($applied_styles as $spec => $arr) {
printf(" specificity 0x%08x\n", $spec);
/** @var Style $s */
foreach ($arr as $s) {
print " [\n";
$s->debug_print();
print " ]\n";
}
}
}
// Merge the new styles with the inherited styles
$acceptedmedia = self::$ACCEPTED_GENERIC_MEDIA_TYPES;
$acceptedmedia[] = $this->_dompdf->getOptions()->getDefaultMediaType();
foreach ($applied_styles as $arr) {
/** @var Style $s */
foreach ($arr as $s) {
$media_queries = $s->get_media_queries();
foreach ($media_queries as $media_query) {
list($media_query_feature, $media_query_value) = $media_query;
// if any of the Style's media queries fail then do not apply the style
//TODO: When the media query logic is fully developed we should not apply the Style when any of the media queries fail or are bad, per https://www.w3.org/TR/css3-mediaqueries/#error-handling
if (in_array($media_query_feature, self::$VALID_MEDIA_TYPES)) {
if ((strlen($media_query_feature) === 0 && !in_array($media_query, $acceptedmedia)) || (in_array($media_query, $acceptedmedia) && $media_query_value == "not")) {
continue (3);
}
} else {
switch ($media_query_feature) {
case "height":
if ($paper_height !== (float)$style->length_in_pt($media_query_value)) {
continue (3);
}
break;
case "min-height":
if ($paper_height < (float)$style->length_in_pt($media_query_value)) {
continue (3);
}
break;
case "max-height":
if ($paper_height > (float)$style->length_in_pt($media_query_value)) {
continue (3);
}
break;
case "width":
if ($paper_width !== (float)$style->length_in_pt($media_query_value)) {
continue (3);
}
break;
case "min-width":
//if (min($paper_width, $media_query_width) === $paper_width) {
if ($paper_width < (float)$style->length_in_pt($media_query_value)) {
continue (3);
}
break;
case "max-width":
//if (max($paper_width, $media_query_width) === $paper_width) {
if ($paper_width > (float)$style->length_in_pt($media_query_value)) {
continue (3);
}
break;
case "orientation":
if ($paper_orientation !== $media_query_value) {
continue (3);
}
break;
default:
Helpers::record_warnings(E_USER_WARNING, "Unknown media query: $media_query_feature", __FILE__, __LINE__);
break;
}
}
}
$style->merge($s);
}
}
}
// Handle inheritance
if ($p && $DEBUGCSS) {
print " inherit [\n";
$p->get_style()->debug_print();
print " ]\n";
}
$style->inherit($p ? $p->get_style() : null);
if ($DEBUGCSS) {
print " DomElementStyle [\n";
$style->debug_print();
print " ]\n";
print "]\n</pre>";
}
$style->clear_important();
$frame->set_style($style);
if (!$root_flg && $this->_page_styles["base"]) {
$root_flg = true;
// set the page width, height, and orientation based on the parsed page style
if ($style->size !== "auto") {
list($paper_width, $paper_height) = $style->size;
}
$paper_width = $paper_width - (float)$style->length_in_pt($style->margin_left) - (float)$style->length_in_pt($style->margin_right);
$paper_height = $paper_height - (float)$style->length_in_pt($style->margin_top) - (float)$style->length_in_pt($style->margin_bottom);
$paper_orientation = ($paper_width > $paper_height ? "landscape" : "portrait");
}
}
// We're done! Clean out the registry of all styles since we
// won't be needing this later.
foreach (array_keys($this->_styles) as $key) {
$this->_styles[$key] = null;
unset($this->_styles[$key]);
}
}
/**
* parse a CSS string using a regex parser
* Called by {@link Stylesheet::parse_css()}
*
* @param string $str
*
* @throws Exception
*/
private function _parse_css($str)
{
$str = trim($str);
// Destroy comments and remove HTML comments
$css = preg_replace([
"'/\*.*?\*/'si",
"/^<!--/",
"/-->$/"
], "", $str);
// FIXME: handle '{' within strings, e.g. [attr="string {}"]
// Something more legible:
$re =
"/\s* # Skip leading whitespace \n" .
"( @([^\s{]+)\s*([^{;]*) (?:;|({)) )? # Match @rules followed by ';' or '{' \n" .
"(?(1) # Only parse sub-sections if we're in an @rule... \n" .
" (?(4) # ...and if there was a leading '{' \n" .
" \s*( (?:(?>[^{}]+) ({)? # Parse rulesets and individual @page rules \n" .
" (?(6) (?>[^}]*) }) \s*)+? \n" .
" ) \n" .
" }) # Balancing '}' \n" .
"| # Branch to match regular rules (not preceded by '@') \n" .
"([^{]*{[^}]*})) # Parse normal rulesets \n" .
"/xs";
if (preg_match_all($re, $css, $matches, PREG_SET_ORDER) === false) {
// An error occurred
throw new Exception("Error parsing css file: preg_match_all() failed.");
}
// After matching, the array indices are set as follows:
//
// [0] => complete text of match
// [1] => contains '@import ...;' or '@media {' if applicable
// [2] => text following @ for cases where [1] is set
// [3] => media types or full text following '@import ...;'
// [4] => '{', if present
// [5] => rulesets within media rules
// [6] => '{', within media rules
// [7] => individual rules, outside of media rules
//
$media_query_regex = "/(?:((only|not)?\s*(" . implode("|", self::$VALID_MEDIA_TYPES) . "))|(\s*\(\s*((?:(min|max)-)?([\w\-]+))\s*(?:\:\s*(.*?)\s*)?\)))/isx";
//Helpers::pre_r($matches);
foreach ($matches as $match) {
$match[2] = trim($match[2]);
if ($match[2] !== "") {
// Handle @rules
switch ($match[2]) {
case "import":
$this->_parse_import($match[3]);
break;
case "media":
$acceptedmedia = self::$ACCEPTED_GENERIC_MEDIA_TYPES;
$acceptedmedia[] = $this->_dompdf->getOptions()->getDefaultMediaType();
$media_queries = preg_split("/\s*,\s*/", mb_strtolower(trim($match[3])));
foreach ($media_queries as $media_query) {
if (in_array($media_query, $acceptedmedia)) {
//if we have a media type match go ahead and parse the stylesheet
$this->_parse_sections($match[5]);
break;
} elseif (!in_array($media_query, self::$VALID_MEDIA_TYPES)) {
// otherwise conditionally parse the stylesheet assuming there are parseable media queries
if (preg_match_all($media_query_regex, $media_query, $media_query_matches, PREG_SET_ORDER) !== false) {
$mq = [];
foreach ($media_query_matches as $media_query_match) {
if (empty($media_query_match[1]) === false) {
$media_query_feature = strtolower($media_query_match[3]);
$media_query_value = strtolower($media_query_match[2]);
$mq[] = [$media_query_feature, $media_query_value];
} elseif (empty($media_query_match[4]) === false) {
$media_query_feature = strtolower($media_query_match[5]);
$media_query_value = (array_key_exists(8, $media_query_match) ? strtolower($media_query_match[8]) : null);
$mq[] = [$media_query_feature, $media_query_value];
}
}
$this->_parse_sections($match[5], $mq);
break;
}
}
}
break;
case "page":
//This handles @page to be applied to page oriented media
//Note: This has a reduced syntax:
//@page { margin:1cm; color:blue; }
//Not a sequence of styles like a full.css, but only the properties
//of a single style, which is applied to the very first "root" frame before
//processing other styles of the frame.
//Working properties:
// margin (for margin around edge of paper)
// font-family (default font of pages)
// color (default text color of pages)
//Non working properties:
// border
// padding
// background-color
//Todo:Reason is unknown
//Other properties (like further font or border attributes) not tested.
//If a border or background color around each paper sheet is desired,
//assign it to the <body> tag, possibly only for the css of the correct media type.
// If the page has a name, skip the style.
$page_selector = trim($match[3]);
$key = null;
switch ($page_selector) {
case "":
$key = "base";
break;
case ":left":
case ":right":
case ":odd":
case ":even":
/** @noinspection PhpMissingBreakStatementInspection */
case ":first":
$key = $page_selector;
break;
default:
break 2;
}
// Store the style for later...
if (empty($this->_page_styles[$key])) {
$this->_page_styles[$key] = $this->_parse_properties($match[5]);
} else {
$this->_page_styles[$key]->merge($this->_parse_properties($match[5]));
}
break;
case "font-face":
$this->_parse_font_face($match[5]);
break;
default:
// ignore everything else
break;
}
continue;
}
if ($match[7] !== "") {
$this->_parse_sections($match[7]);
}
}
}
/**
* Resolve the given `url()` declaration to an absolute URL.
*
* @param string|null $val The declaration to resolve in the context of the stylesheet.
* @return string The resolved URL, or `none`, if the value is `none`,
* invalid, or points to a non-existent local file.
*/
public function resolve_url($val): string
{
$DEBUGCSS = $this->_dompdf->getOptions()->getDebugCss();
$parsed_url = "none";
if (empty($val) || $val === "none") {
$path = "none";
} elseif (mb_strpos($val, "url") === false) {
$path = "none"; //Don't resolve no image -> otherwise would prefix path and no longer recognize as none
} else {
$val = preg_replace("/url\(\s*['\"]?([^'\")]+)['\"]?\s*\)/", "\\1", trim($val));
// Resolve the url now in the context of the current stylesheet
$path = Helpers::build_url($this->_protocol,
$this->_base_host,
$this->_base_path,
$val);
if ($path === null) {
$path = "none";
}
}
if ($DEBUGCSS) {
$parsed_url = Helpers::explode_url($path);
print "<pre>[_image\n";
print_r($parsed_url);
print $this->_protocol . "\n" . $this->_base_path . "\n" . $path . "\n";
print "_image]</pre>";
}
return $path;
}
/**
* parse @import{} sections
*
* @param string $url the url of the imported CSS file
*/
private function _parse_import($url)
{
$arr = preg_split("/[\s\n,]/", $url, -1, PREG_SPLIT_NO_EMPTY);
$url = array_shift($arr);
$accept = false;
if (count($arr) > 0) {
$acceptedmedia = self::$ACCEPTED_GENERIC_MEDIA_TYPES;
$acceptedmedia[] = $this->_dompdf->getOptions()->getDefaultMediaType();
// @import url media_type [media_type...]
foreach ($arr as $type) {
if (in_array(mb_strtolower(trim($type)), $acceptedmedia)) {
$accept = true;
break;
}
}
} else {
// unconditional import
$accept = true;
}
if ($accept) {
// Store our current base url properties in case the new url is elsewhere
$protocol = $this->_protocol;
$host = $this->_base_host;
$path = $this->_base_path;
// $url = str_replace(array('"',"url", "(", ")"), "", $url);
// If the protocol is php, assume that we will import using file://
// $url = Helpers::build_url($protocol === "php://" ? "file://" : $protocol, $host, $path, $url);
// Above does not work for subfolders and absolute urls.
// Todo: As above, do we need to replace php or file to an empty protocol for local files?
if (($url = $this->resolve_url($url)) !== "none") {
$this->load_css_file($url);
}
// Restore the current base url
$this->_protocol = $protocol;
$this->_base_host = $host;
$this->_base_path = $path;
}
}
/**
* parse @font-face{} sections
* http://www.w3.org/TR/css3-fonts/#the-font-face-rule
*
* @param string $str CSS @font-face rules
*/
private function _parse_font_face($str)
{
$descriptors = $this->_parse_properties($str);
preg_match_all("/(url|local)\s*\([\"\']?([^\"\'\)]+)[\"\']?\)\s*(format\s*\([\"\']?([^\"\'\)]+)[\"\']?\))?/i", $descriptors->src, $src);
$valid_sources = [];
foreach ($src[0] as $i => $value) {
$source = [
"local" => strtolower($src[1][$i]) === "local",
"uri" => $src[2][$i],
"format" => strtolower($src[4][$i]),
"path" => Helpers::build_url($this->_protocol, $this->_base_host, $this->_base_path, $src[2][$i]),
];
if (!$source["local"] && in_array($source["format"], ["", "truetype"]) && $source["path"] !== null) {
$valid_sources[] = $source;
}
}
// No valid sources
if (empty($valid_sources)) {
return;
}
$style = [
"family" => $descriptors->get_font_family_raw(),
"weight" => $descriptors->font_weight,
"style" => $descriptors->font_style,
];
$this->getFontMetrics()->registerFont($style, $valid_sources[0]["path"], $this->_dompdf->getHttpContext());
}
/**
* parse regular CSS blocks
*
* _parse_properties() creates a new Style object based on the provided
* CSS rules.
*
* @param string $str CSS rules
* @return Style
*/
private function _parse_properties($str)
{
$properties = preg_split("/;(?=(?:[^\(]*\([^\)]*\))*(?![^\)]*\)))/", $str);
$DEBUGCSS = $this->_dompdf->getOptions()->getDebugCss();
if ($DEBUGCSS) {
print '[_parse_properties';
}
// Create the style
$style = new Style($this, Stylesheet::ORIG_AUTHOR);
foreach ($properties as $prop) {
// If the $prop contains an url, the regex may be wrong
// @todo: fix the regex so that it works every time
/*if (strpos($prop, "url(") === false) {
if (preg_match("/([a-z-]+)\s*:\s*[^:]+$/i", $prop, $m))
$prop = $m[0];
}*/
//A css property can have " ! important" appended (whitespace optional)
//strip this off to decode core of the property correctly.
/* Instead of short code, prefer the typical case with fast code
$important = preg_match("/(.*?)!\s*important/",$prop,$match);
if ( $important ) {
$prop = $match[1];
}
$prop = trim($prop);
*/
if ($DEBUGCSS) print '(';
$important = false;
$prop = trim($prop);
if (substr($prop, -9) === 'important') {
$prop_tmp = rtrim(substr($prop, 0, -9));
if (substr($prop_tmp, -1) === '!') {
$prop = rtrim(substr($prop_tmp, 0, -1));
$important = true;
}
}
if ($prop === "") {
if ($DEBUGCSS) print 'empty)';
continue;
}
$i = mb_strpos($prop, ":");
if ($i === false) {
if ($DEBUGCSS) print 'novalue' . $prop . ')';
continue;
}
$prop_name = rtrim(mb_strtolower(mb_substr($prop, 0, $i)));
$value = ltrim(mb_substr($prop, $i + 1));
if ($DEBUGCSS) print $prop_name . ':=' . $value . ($important ? '!IMPORTANT' : '') . ')';
$style->set_prop($prop_name, $value, $important, false);
}
if ($DEBUGCSS) print '_parse_properties]';
return $style;
}
/**
* parse selector + rulesets
*
* @param string $str CSS selectors and rulesets
* @param array $media_queries
*/
private function _parse_sections($str, $media_queries = [])
{
// Pre-process selectors: collapse all whitespace and strip whitespace
// around '>', '.', ':', '+', '~', '#'
$patterns = ["/\s+/", "/\s+([>.:+~#])\s+/"];
$replacements = [" ", "\\1"];
$DEBUGCSS = $this->_dompdf->getOptions()->getDebugCss();
$sections = explode("}", $str);
if ($DEBUGCSS) print '[_parse_sections';
foreach ($sections as $sect) {
$i = mb_strpos($sect, "{");
if ($i === false) { continue; }
if ($DEBUGCSS) print '[section';
$selector_str = preg_replace($patterns, $replacements, mb_substr($sect, 0, $i));
$selectors = preg_split("/,(?![^\(]*\))/", $selector_str, 0, PREG_SPLIT_NO_EMPTY);
$style = $this->_parse_properties(trim(mb_substr($sect, $i + 1)));
// Assign it to the selected elements
foreach ($selectors as $selector) {
$selector = trim($selector);
if ($selector == "") {
if ($DEBUGCSS) print '#empty#';
continue;
}
if ($DEBUGCSS) print '#' . $selector . '#';
//if ($DEBUGCSS) { if (strpos($selector,'p') !== false) print '!!!p!!!#'; }
//FIXME: tag the selector with a hash of the media query to separate it from non-conditional styles (?), xpath comments are probably not what we want to do here
if (count($media_queries) > 0) {
$style->set_media_queries($media_queries);
}
$this->add_style($selector, $style);
}
if ($DEBUGCSS) {
print 'section]';
}
}
if ($DEBUGCSS) {
print "_parse_sections]\n";
}
}
/**
* @return string
*/
public function getDefaultStylesheet()
{
$options = $this->_dompdf->getOptions();
$rootDir = realpath($options->getRootDir());
return Helpers::build_url("file://", "", $rootDir, $rootDir . self::DEFAULT_STYLESHEET);
}
/**
* @param FontMetrics $fontMetrics
* @return $this
*/
public function setFontMetrics(FontMetrics $fontMetrics)
{
$this->fontMetrics = $fontMetrics;
return $this;
}
/**
* @return FontMetrics
*/
public function getFontMetrics()
{
return $this->fontMetrics;
}
/**
* dumps the entire stylesheet as a string
*
* Generates a string of each selector and associated style in the
* Stylesheet. Useful for debugging.
*
* @return string
*/
function __toString()
{
$str = "";
foreach ($this->_styles as $selector => $selector_styles) {
/** @var Style $style */
foreach ($selector_styles as $style) {
$str .= "$selector => " . $style->__toString() . "\n";
}
}
return $str;
}
}