diff options
author | Samuel Thibault <samuel.thibault@ens-lyon.org> | 2011-01-09 23:34:42 +0100 |
---|---|---|
committer | Samuel Thibault <samuel.thibault@ens-lyon.org> | 2011-01-09 23:34:42 +0100 |
commit | f3df65ce34153357d28bee621bdf49b61e68b182 (patch) | |
tree | 25ac416b157b010ca2f942dac5c5ba0b38a924ac /purify_html | |
parent | 09184ae09c44c052a207aa5c6dc8ce9cf61a343f (diff) | |
parent | 3bbe62327128ce85829a4cb2fb429bd8f21b4d75 (diff) | |
download | web-f3df65ce34153357d28bee621bdf49b61e68b182.tar.gz web-f3df65ce34153357d28bee621bdf49b61e68b182.tar.bz2 web-f3df65ce34153357d28bee621bdf49b61e68b182.zip |
Merge branch 'master' of flubber:~hurd-web/hurd-web
Diffstat (limited to 'purify_html')
-rwxr-xr-x | purify_html | 42 |
1 files changed, 33 insertions, 9 deletions
diff --git a/purify_html b/purify_html index 4cf582af..9c3a7862 100755 --- a/purify_html +++ b/purify_html @@ -1,15 +1,39 @@ #!/bin/sh -# Mangle the rendered files to cause fewer differernces upon re-rendering. +# Mangle the rendered files to cause fewer differences after re-rendering. -# Written by Thomas Schwinge <tschwinge@gnu.org>. +# Written by Thomas Schwinge <thomas@schwinge.name>. # Un-mangle mailto links: convert HTML character entities to real characters. find ./ -name \*.html -print0 \ - | xargs -0 \ - perl -p -i -l -e \ - 'BEGIN { $replacing = 0; } - # The replacing-toggling logic is a bit rough, but so is life. - $replacing = 1 if /<a href="mailto:/; - s%\&#(x?)([^;]*);%chr(length($1) ? hex($2) : $2)%eg if $replacing; - $replacing = 0 if /<\/a>/;' + | xargs -0 --no-run-if-empty -n 1 \ + perl -e \ + 'BEGIN { + $file = $ARGV[0]; + $discard = 1; + $replacing = 0; + + # TODO: could use a proper temporary file. + open(OUT, ">$file.new") or die "open: $file: $!"; + select(OUT) or die "select: $file: $!"; + } + + while (<>) { + # The replacing-toggling logic is a bit rough, but so is life. + $replacing = 1 if /<a href="mailto:/; + s%\&#(x?)([^;]*);%$discard = 0; chr(length($1) ? hex($2) : $2);%eg if $replacing; + $replacing = 0 if /<\/a>/; + } continue { + print or die "print: $file: $!"; + } + + END { + if ($discard) { + unlink("$file.new") or die "unlink: $file: $!"; + } else { + rename("$file.new", $file) or die "rename: $file: $!"; + } + }' + +# Compared to using ``perl -p -i -l'', this solution maintains the files' +# original timestamps unless they're actually modified. |