This is still a partial solution to page splitting with cross-document links, but I may build it into something that can apply cross-document links. I'm posting what I have so far, in case anyone finds it useful.
Usage: ./jcv-chapters.sh document.html
jcv-chapters.js/*** @file jcv-chapters.js* @brief document splitting for princexml* @author sfinktah* @version 0.03* @date 2014-01-19*//*** @brief copy_properties* same as $.extend or _.extend, etc.* @param b target object* @param c source object** @return b*/function copy_properties(b, c) {/**/b = b || {}; c = c || {};/**/for (var a in c) b[a] = c[a];/**/if (c.hasOwnProperty && c.hasOwnProperty("toString")/**//**/&& typeof c.toString != "undefined" && b.toString !== c.toString)/**/b.toString = c.toString; return b;}/*** @brief copy_properties sfinktah pagesplitting extension*/sfinktah = copy_properties(window.sfinktah, {/**/chapters: [],/**/lastChapter: -1,/**/princeComplete: function() {/**//**/// TODO: Prince.pageCount may not not be the last page number/**//**/// TODO: Strange results occur when source has non-contiguous pg#s/**//**/sfinktah.chapters[sfinktah.lastChapter].end = Prince.pageCount;/**//**/if (window.Log && Log.data) {/**//**//**/// This is the output the shell script will search for/**//**//**/Log.data('@@CHAPTERS', 'chapters=( '/**//**//**/+ sfinktah.chapters.map(function(v) {/**//**//**//**/return '"' + v.start + '..' + v.end + '"'/**//**//**/}).join(' ') + ')');/**//**/}/**/},/**/newChapter: function(page, chapter, name) {/**//**/sfinktah.lastChapter = chapter;/**//**/console.log('chapter ' + chapter + ' page ' + page + ' ' + name);/**//**/sfinktah.chapters[chapter] = { start: page, end: -1 };/**//**/if (sfinktah.chapters[chapter - 1]) {/**//**//**/sfinktah.chapters[chapter - 1].end = page - 1;/**//**/} // TODO: have observed instances of -1 in results/**//**/return '';/**/}});// Now to define our exportsif (window.Prince) {/**/Prince.addScriptFunc("newChapter", function(page, a, b) {/**//**/return sfinktah.newChapter(page, a, b);/**/});/**/Prince.addEventListener("complete", sfinktah.princeComplete, false);}// vim: set ts=3 sts=160 sw=3 cc=76 noet:jcv-chapters.cssbody > :first-child {
/* We need to reset this counter each time around, don't we? */
counter-reset: chapter-count;
}
body > div:not(:first-of-type)::before {
/* This just captures oddities such as front matter */
/* You'll need another one of these to delineate your split points */
counter-increment: chapter-count;
content: prince-script(newChapter, counter(page), counter(chapter-count), 'frontmatter');
}
/* Create the chapter splitting data */
ul[data-node="root"] > li[data-node] > div.f > h1.heading1.dotlevel1::after {
counter-increment: chapter-count;
content: prince-script(newChapter, counter(page), counter(chapter-count), string(h1-title));
}
jcv-chapters.sh#!/usr/bin/env bash
# vim: set ts=3 sts=48 sw=3 cc=76 et fdm=marker: # **** IGNORE ******
get_range() { RANGE= # <-- OUTPUT **** THIS ******
local rstart rend i arr=( "$@" ) # ported from **** JUNK ******
for (( i=0 ; i < $# ; i++ )); do # http://stackoverflow.com
(( rstart = arr[i] )) # /a/2270987/912236
rend=$rstart; while (( arr[i+1] - arr[i] == 1 )); do
(( rend = arr[++i] )); done; (( rstart == rend )) &&
RANGE+=" $rstart" || RANGE+=" $rstart-$rend"; done; } # }}}
INPUT=${1} # Take input from command line
# INPUT=jcv_vsm.html
BASE=${INPUT%.html}
OUTPUT="$BASE.pdf"
# Take the cover/copyright notice from the
# final output document, and insert it before
# each volume.
#
# You can specify other pages from other sources
# here too.
# eg: INSERTS=( "cover-a4.pdf" "1" )
INSERTS=( "$OUTPUT" "1-2" )
# Join the chapters up-to a maximum of MAX_SECTION_PAGE_COUNT
MAX_SECTION_PAGE_COUNT=$(( 738 ))
SAMPLE_PRINCE_OUTPUT=$(
http_proxy= \
prince \
--script jcv-chapters.js \
-s jcv-chapters.css \
$INPUT \
-vo $OUTPUT \
2>&1 \
| tee /dev/stderr \
| grep 'prince: @@CHAPTERS'
)
# Extract the chapter information from the output
# string.
chapter_vardef=${SAMPLE_PRINCE_OUTPUT##*@@CHAPTERS: chapters=}
declare -a PRINCE_CHAPTER_LIST="$chapter_vardef"
# Now we're going to expand out each chapter
# into individual pages, then copy those pages
# to a numbered PDF. Yes, we could have
# just used a range, but this way is cooler.
# e.g. we can easily count how many pages there
# are in a group, and do things with odd/even
# CHAPTERS, or factors of 4 or 8 (for pageup's)
# Iterate through all the CHAPTERS, and join
# them into volumes.
VOLUME_NUMBER=1
SECTION_PAGE_COUNT=0
SECTION_PAGE_LIST="${INSERTS[@]} "
for key in "${!PRINCE_CHAPTER_LIST[@]}"
do
chapter=${PRINCE_CHAPTER_LIST[$key]}
# Expand the chapter range into page numbers
declare -a 'CHAPTER_PAGE_LIST=({'"$chapter"'})'
# Work out total page count
CHAPTER_PAGE_COUNT=${#CHAPTER_PAGE_LIST[@]}
# Compress list of pages into a range (don't laugh)
get_range "${CHAPTER_PAGE_LIST[@]}" # put into $RANGE
if (( CHAPTER_PAGE_COUNT + SECTION_PAGE_COUNT
>= MAX_SECTION_PAGE_COUNT ))
then # have we exceed the maximum page length? if so, output
# everything until now. zero pad volume number.
printf -v part %02d $VOLUME_NUMBER
echo "Making part $part with $SECTION_PAGE_COUNT pages"
pdfjam $RANGE --outfile "$BASE-part$VOLUME_NUMBER.pdf"
(( VOLUME_NUMBER ++ ))
(( SECTION_PAGE_COUNT = CHAPTER_PAGE_COUNT ))
SECTION_PAGE_LIST="${INSERTS[@]} $OUTPUT $RANGE "
else
SECTION_PAGE_LIST+="$OUTPUT $RANGE "
(( SECTION_PAGE_COUNT += CHAPTER_PAGE_COUNT ))
fi
done
# We have to deal with the pages left over yet!
printf -v part %02d $VOLUME_NUMBER
echo "Making part $part with $SECTION_PAGE_COUNT pages"
pdfjam $SECTION_PAGE_LIST --outfile "$BASE-part$VOLUME_NUMBER.pdf"
echo Done.