Apparently, strings containing multi-byte characters are not treated correctly by regexps.
The test-file below performs an identity replace on some strings and demonstrates the problem. It works as expected in a browser, but fails in Prince.
Best regards,
Mikkel
The test-file below performs an identity replace on some strings and demonstrates the problem. It works as expected in a browser, but fails in Prince.
Best regards,
Mikkel
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-type" content="text/html; charset=utf-8"/>
<title>Regexp Multi-byte Character Bug</title>
<style type="text/css">
* {
text-align: left;
}
table {
width: 100%;
border-collapse: collapse;
}
td {
width: 30%;
}
tr.ok td:last-child {
background: green;
width: 10%;
}
tr.fail td:last-child {
background: red;
}
</style>
</head>
<body>
<h1>Regexp Multi-byte Character Bug</h1>
<table>
<thead>
<tr>
<th>text</th>
<th>expected</th>
<th>actual</th>
<th>result</th>
</tr>
</thead>
<tbody id="tests">
</tbody>
</table>
<script type="text/javascript">/*<![CDATA[*/
(function() {
var createElement = function(name, attributes, content) {
var attr, el;
el = document.createElementNS('http://www.w3.org/1999/xhtml', name);
if (attributes) {
for (attr in attributes) {
el.setAttribute(attr, attributes[attr]);
}
}
if (content) {
el.appendChild(document.createTextNode(content));
}
return el;
}
var addTestResult = function(text, expected, actual) {
var row, status;
status = (expected === actual) ? 'ok' : 'fail';
row = createElement('tr', { 'class': status });
row.appendChild(createElement('td', null, text));
row.appendChild(createElement('td', null, expected));
row.appendChild(createElement('td', null, actual));
row.appendChild(createElement('td', null, status));
document.getElementById('tests').appendChild(row);
console.log([ text, expected, actual, status ].join('\t'));
},
i, text, expected, actual,
texts = [
'"æ',
'æ"',
'"æøå" "x"',
'"xyz" "x"',
'xyzæøå',
'åøæzyx'
];
for (i = 0; i < texts.length; i++) {
text = texts[i];
expected = text;
// Identity replace
actual = text.replace(new RegExp('.', 'g'), '$&');
addTestResult(text, expected, actual);
}
})();
/*]]>*/</script>
</body>
</html>