You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
295 lines
6.7 KiB
295 lines
6.7 KiB
#!/usr/bin/perl
|
|
#
|
|
# hindent 1.1.2
|
|
#
|
|
# Properly indent HTML code and convert tags to uppercase like the Gods intended.
|
|
# Understands all nesting tags defined under the HTML 3.2 standard.
|
|
#
|
|
# by Paul Balyoz <pab@domtools.com>
|
|
#
|
|
# Usage:
|
|
# hindent [-fslcv] [-i num] [file ...] > newfile
|
|
#
|
|
# Options:
|
|
# -f Flow - just prints tags _without_args_, for visual checking.
|
|
# NOTE: This option DAMAGES the HTML code. The output is for
|
|
# human debugging use ONLY. Keep your original file!!
|
|
# -s Strict - prints 1 tag per line with proper indenting.
|
|
# Helpful for deciphering HTML code that's all on one line.
|
|
# NOTE: This slightly DAMAGES the HTML code because it introduces
|
|
# whitespace around tags that had none before, which will mess up
|
|
# formatting somewhat on the page (links will have extra spaces, etc).
|
|
# -i num Set indentation to this many characters.
|
|
# -l List all the tags we recognize and exit.
|
|
# -c Lowercase HTML tags. (Uppercase is default)
|
|
# -v Print version of hindent and exit.
|
|
#
|
|
# Copyright (C) 1993-1999 Paul A. Balyoz <pab@domtools.com>
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
#
|
|
|
|
|
|
# How many spaces to indent per level?
|
|
# (sets of 8-spaces will be automatically converted to tabs intelligently).
|
|
# You can use any value here, some recommendations: 8, 4, 3, or 2
|
|
$spacesperlevel = 2;
|
|
|
|
# How many spaces does a "tab" occupy on your screen?
|
|
# Unix generally uses 8-space-tabs, but it's user-configurable in most editors.
|
|
# If tabs are not turned off (-t0) then we output 1 tab character for every
|
|
# $tabstop spaces we need to output.
|
|
$tabstop = 8;
|
|
|
|
# Tags that require their own end tag <TAG>...</TAG> we will nest them
|
|
# properly: (WARNING, you must use lower-case here)
|
|
# All other tags (not on this list) will be ignored for indenting purposes.
|
|
%nesttag = (
|
|
'html' => 1,
|
|
'head' => 1,
|
|
'body' => 1,
|
|
'title' => 1,
|
|
|
|
'a' => 1,
|
|
|
|
'table' => 1,
|
|
'tr' => 1,
|
|
'th' => 1,
|
|
'td' => 1,
|
|
|
|
'form' => 1,
|
|
'select' => 1,
|
|
'textarea' => 1,
|
|
|
|
# 'p' => 1, Don't do this one because many people use <P> but not </P>
|
|
'ul' => 1,
|
|
'ol' => 1,
|
|
'dl' => 1,
|
|
'blockquote' => 1,
|
|
'center' => 1,
|
|
'div' => 1,
|
|
|
|
'font' => 1,
|
|
'pre' => 1,
|
|
'tt' => 1,
|
|
'i' => 1,
|
|
'b' => 1,
|
|
'u' => 1,
|
|
'strike' => 1,
|
|
'big' => 1,
|
|
'small' => 1,
|
|
'sub' => 1,
|
|
'sup' => 1,
|
|
'em' => 1,
|
|
'strong' => 1,
|
|
'dfn' => 1,
|
|
'code' => 1,
|
|
'samp' => 1,
|
|
'kbd' => 1,
|
|
'var' => 1,
|
|
'cite' => 1,
|
|
|
|
'h1' => 1,
|
|
'h2' => 1,
|
|
'h3' => 1,
|
|
'h4' => 1,
|
|
'h5' => 1,
|
|
'h6' => 1,
|
|
|
|
'applet' => 1,
|
|
|
|
'map' => 1,
|
|
|
|
'frameset' => 1,
|
|
'noframes' => 1,
|
|
);
|
|
|
|
|
|
#-------------------\
|
|
# END CONFIGURATIONS ===================================================================
|
|
#-------------------/
|
|
|
|
use Getopt::Std;
|
|
|
|
|
|
#
|
|
# Parse args
|
|
#
|
|
|
|
sub usageexit {
|
|
print STDERR "usage: hindent [-fslcv] [-i num] [-t num] [file ...] > newfile\n";
|
|
exit 1;
|
|
}
|
|
|
|
getopts('fsi:lvt:c') || &usageexit;
|
|
if (defined $opt_i) {
|
|
if ($opt_i < 0 || $opt_i > 10) {
|
|
print STDERR "$0: error: indentation factor '$opt_i' not in range 0..10.\n";
|
|
&usageexit;
|
|
} else {
|
|
$spacesperlevel = $opt_i;
|
|
}
|
|
}
|
|
if (defined $opt_t) {
|
|
if ($opt_t < 0 || $opt_t > 12) {
|
|
print STDERR "$0: error: indentation factor '$opt_i' not in range 0..12.\n";
|
|
&usageexit;
|
|
} else {
|
|
$tabstop = $opt_t;
|
|
}
|
|
}
|
|
|
|
|
|
#
|
|
# If -l option, just list tags and exit.
|
|
#
|
|
|
|
if ($opt_l) {
|
|
print "hindent recognizes these HTML tags:\n";
|
|
for $tag (sort(keys(%nesttag))) {
|
|
$tag =~ tr/a-z/A-Z/;
|
|
print "$tag\n";
|
|
}
|
|
exit 0;
|
|
}
|
|
|
|
|
|
#
|
|
# If -v option, just print version and exit.
|
|
#
|
|
|
|
if ($opt_v) {
|
|
print "hindent version 1.1.2\n";
|
|
exit 0;
|
|
}
|
|
|
|
|
|
#
|
|
# Main HTML parsing code
|
|
#
|
|
|
|
$level = 0; # indentation level
|
|
$changelevel = 0; # change in indentation level (delta)
|
|
$out = ""; # accumulated output string
|
|
while (<>) {
|
|
chomp; # some HTML has no newline on last line, chop mangles it.
|
|
s/^\s+//; # remove ALL preceding whitespace, we rebuild it ourselves
|
|
$line++;
|
|
|
|
$end = -1;
|
|
$start = $len = 0;
|
|
while (/<(.*?)>/g) {
|
|
$end = $start+$len-1; # of previous values
|
|
$start = length($`);
|
|
$len = 1 + length($1) + 1;
|
|
($tag,$arg) = split(/\s+/,$1,2);
|
|
if (!$opt_f) {
|
|
$out .= substr($_, $end+1, $start-($end+1)); # print stuff from last tag to here
|
|
}
|
|
if ($opt_c) {
|
|
$tag =~ tr/A-Z/a-z/;
|
|
} else {
|
|
$tag =~ tr/a-z/A-Z/;
|
|
}
|
|
if ($arg && !$opt_f) {
|
|
$out .= "<$tag $arg>";
|
|
} else {
|
|
$out .= "<$tag>";
|
|
}
|
|
|
|
# if regular tag, push it on stack; if end-tag, pop it off stack.
|
|
# but don't do any of this if it's not a special "nesting" tag!
|
|
if ($tag !~ m,^/,) {
|
|
if ($nesttag{lc($tag)}) {
|
|
push @tagstack,$tag;
|
|
$changelevel++; # remember how much for later
|
|
}
|
|
} else {
|
|
$tag =~ s,^/,,; # convert this end-tag to a begin-tag
|
|
$tag = lc($tag);
|
|
if ($nesttag{lc($tag)}) {
|
|
# throw away tags until we find a match
|
|
if ($#tagstack > -1) {
|
|
while ($tag ne lc(pop @tagstack)) {
|
|
$changelevel--; # we threw away extra tags
|
|
last if $#tagstack <= 0;
|
|
}
|
|
$changelevel--; # we threw away extra tags
|
|
if ($level+$changelevel < 0) {
|
|
print STDERR "line $line: saw more end tags than begin ones!\n";
|
|
$changelevel = -$level;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
&printout if $opt_s; # -s -> print every tag on new line
|
|
}
|
|
|
|
#
|
|
# Print rest of line after the last match, and newline.
|
|
# (not part of Flow)
|
|
#
|
|
if (!$opt_f) {
|
|
$end = $start+$len-1;
|
|
$out .= substr($_,$end+1,length($_)-($end+1));
|
|
}
|
|
|
|
&printout;
|
|
}
|
|
|
|
# Any tags left on the stack?
|
|
if ($level > 0) {
|
|
print STDERR "WARNING: level=$level, ", $#tagstack+1," tags left on stack after done parsing! Specifically:\n";
|
|
while ($tag = pop @tagstack) {
|
|
print STDERR "\t$tag";
|
|
}
|
|
}
|
|
|
|
exit 0;
|
|
|
|
|
|
#
|
|
# Print this line of data indented properly.
|
|
#
|
|
sub printout {
|
|
my($numtabs) = 0;
|
|
|
|
#
|
|
# To OUTdent, do that BEFORE printing.
|
|
#
|
|
if ($changelevel < 0) {
|
|
$level += $changelevel;
|
|
$changelevel = 0;
|
|
}
|
|
|
|
#
|
|
# Print indents and this line of output
|
|
#
|
|
$spaces = " " x ($level * $spacesperlevel);
|
|
$numtabs = int(length($spaces)/$tabstop) if $tabstop;
|
|
print "\t" x $numtabs; # print the tabs
|
|
print " " x (length($spaces)-$numtabs*$tabstop); # print the spaces
|
|
print "$out\n";
|
|
$out = "";
|
|
|
|
#
|
|
# To INdent, do that AFTER printing.
|
|
#
|
|
if ($changelevel > 0) {
|
|
$level += $changelevel;
|
|
$changelevel = 0;
|
|
}
|
|
}
|