windows-nt/Source/XPSP1/NT/shell/lib/htmlcln.pl

#!perl

=head1 NAME

Htmlcln - Sanitizes HTML and related files before publication

=head1 SYNOPSIS

perl htmlcln.pl [-t [html|js|css]] [-DVAR[=value] ...] [-v]
-o I<outfile> I<infile>

=head1 DESCRIPTION

The Htmlcln program preprocesses text files such as HTML, JS, or CSS
files and cleans them up.

=over 8
Comments are removed.

Blank lines are removed.

Sections marked "debug" are removed in the retail build.

=back

=head1 OPTIONS

=over 8

=item B<-t> [html|js|css]

Htmlcln normally tries to guess what kind of file it is processing from
the filename extension.  You can explicitly override the guess with the
B<-t> command line switch.

=item B<-D>VAR[=value] ...

Command line definitions are supported in the same manner as the C compiler.
The only command-line variable we pay attention to is the -DDBG flag, which
indicates that this is a debug build.

=item B<-o> I<outfile>

Specifies the name of the output file.

=item I<srcfile>

Specifies the name of the source file.

=back

=cut

use strict qw(vars refs subs);

##############################################################################
#
#   Element -  A class that parses HTML elements
#
#   Instance variables:
#
#       raw         = the raw text
#       attr        = hash of attributes
#       tag         = name of tag, including slash
#
#       If the value of an attribute hash is undef, it means that the
#       attribute is present but with no value.
#

package Element;

#
#   Constructor:  $elem = new Element("<TABLE BORDER>");
#
sub new {
    my ($class, $raw) = @_;
    my $attr = { };
    my $self = { raw => $raw, attr => $attr };
    my $tag;

    if ($raw =~ s/^<([^\s>]*)//) {
        $self->{tag} = uc $1;
        if ($self->{tag} =~ /^[A-Z]/) {
            $raw =~ s/>$//;
            for (;;) {
                if ($raw =~ s/^\s*([-A-Za-z]+)="([^"]*)"// ||
                    $raw =~ s/^\s*([-A-Za-z]+)='([^']*)'// ||
                    $raw =~ s/^\s*([-A-Za-z]+)=(\S*)//) {
                    $attr->{uc $1} = $2;
                } elsif ($raw =~ s/^\s*([A-Za-z]+)//) {
                    $attr->{uc $1} = undef;
                } else {
                    last;
                }
            }
        }
    } else {
        warn "Can't parse \"$raw\"";
    }

    bless $self, $class;

}

#
#   Element::Tag
#
#   Returns the tag.
#
sub Tag {
    my $self = shift;
    $self->{tag};
}

#
#   Element::Attr
#
#   Returns the value of the attribute.
#
sub Attr {
    my ($self, $attr) = @_;
    $self->{attr}{uc $attr};
}

#
#   Element::Exists
#
#   Returns the presence of the attribute.
#
sub Exists {
    my ($self, $attr) = @_;
    exists $self->{attr}{uc $attr};
}


##############################################################################
#
#   Filter base class
#
#   Basic stuff to save people some hassle.
#
#   Per perl tradition, an object is a ref to an anonymous hash where the
#   state is kept.
#
#   Instance variables:
#
#       sink        = reference to filter sink
#

package Filter;

sub new {
    my($class) = @_;
    bless { }, $class;
}

sub SetSink {
    my ($self, $sink) = @_;
    $self->{sink} = $sink;
}

sub Add {
    my $self = shift;
    $self->{sink}->Add(@_);
}

sub Flush { }

sub Close {
    my $self = shift;
    $self->{sink}->Close(@_);
}

sub SinkAdd {
    my $self = shift;
    $self->{sink}->Add(@_);
}

##############################################################################
#
#   TokenFilter filter package
#
#   Does not modify the stream, but merely chops them into tokens, as
#   recognized by NextToken and processed by EachToken.
#
#   Instance data:
#
#       buf         = unprocessed text
#
package TokenFilter;
@TokenFilter::ISA = qw(Filter);

#
#   Append the incoming text to the buffer, then suck out entire tokens.
#
sub Add {
    my($self, $text) = @_;
    my $tok;

    $self->{buf} .= $text;

    while ($self->{buf} ne '' && defined($tok = $self->NextToken))
    {
        $self->EachToken($tok);
    }
}

sub Flush {
    my $self = shift;
    $self->EachToken($self->{buf});
}

#
#   By default, we just sink tokens to the next layer.
#
sub EachToken {
    my($self, $tok) = @_;
    $self->SinkAdd($tok);
}


##############################################################################
#
#   LineFilter filter package
#
#   Tokenizer that recognizes lines.
#
#   Instance data:
#
#       buf         = unprocessed text
#
package LineFilter;
@LineFilter::ISA = qw(TokenFilter);

#
#   Recognize lines.
#
sub NextToken {
    my($self) = shift;

    if ($self->{buf} =~ s/([^\n]*\n)//) {
        $1;
    } else {
        undef;
    }
}

##############################################################################
#
#   WhitespaceFilter filter package
#
#   Removes blank lines and removes leading and trailing whitespace.
#
#   Someday: Collapse multiple whitespace outside of quotation marks.
#
package WhitespaceFilter;
@WhitespaceFilter::ISA = qw(LineFilter);

sub EachToken {
    my($self, $line) = @_;
    $line =~ s/^[ \t]+//;
    $line =~ s/[ \t]+$//;
    $self->SinkAdd($line) unless $line =~ /^$/;
}

##############################################################################
#
#   OutFile filter package
#
#   Writes its output to a file.
#
#   Instance data:
#
#       fh          = name of file handle
#
#

package OutFile;
@OutFile::ISA = qw(Filter);
no strict 'refs';           # Our filename globs aren't very strict

#
#   Custom method:  SetOutput.  Opens an output file.
#

my $seq = 0;

sub SetOutput {
    my($self, $file) = @_;
    $self->{fh} = "OutFile" . $seq++;
    open($self->{fh}, ">$file") || die "Unable to open $file for writing ($!)\n";
}

sub Add {
    my $self = shift;
    print { $self->{fh} }  @_;
}

sub Close {
    my $self = shift;
    close($self->{fh});
}

##############################################################################
#
#   DebugFilter filter package
#
#   Filters out ;debug and ;begin_debug blocks if building retail.
#
#   Instance data:
#
#       skip        = nonzero if we are inside an ignored ;begin_debug block
#       buf         = unprocessed text
#

package DebugFilter;
@DebugFilter::ISA = qw(LineFilter);
no strict 'refs';           # Our filename globs aren't very strict

#
#   See if the line contains a debug marker.
#   If applicable, send the line down the chain.
#
sub EachToken {
    my($self, $line) = @_;

    # ;begin_debug means start skipping if retail
    if ($line =~ s/;begin_debug//) {
        $self->{skip} = $::RetailVersion;
    }

    # If we were skipping, then ;end_debug ends skipping and we should eat it
    if ($line =~ s/;end_debug// && $self->{skip}) {
        $self->{skip} = 0;
    } elsif ($line =~ s/;debug// && $::RetailVersion) {
        # A one-shot debug line in retail - skip it
    } elsif (!$self->{skip}) {
        $self->SinkAdd($line);          # send it down the chain
    }
}

##############################################################################
#
#   CPP filter package
#
#   The CPP filter performs the following operations:
#
#       Removes C and C++-style comments.
#
#       Filters whitespace.
#
#   Instance data:
#
#       buf         = unprocessed text
#       wsf         = child WhitespaceFilter
#       script      = current script sink
#       ultSink     = the ultimate sink

package CPP;
@CPP::ISA = qw(TokenFilter);

sub new {
    my($class) = shift;
    my $self = new Filter;
    $self->{wsf} = new WhitespaceFilter;    # sink into a whitespace filter
    $self->{sink} = $self->{wsf};           # initially use this script
    bless $self, $class;
}

#
#   Recognize tokens, which are lines or /* ... */ comments.
#
sub NextToken {
    my($self) = shift;

    if ($self->{buf} =~ s/^([^\/]+)//) {    # eat up to a slash
        $1;
    } elsif ($self->{buf} =~ s/^\/\/.*?\n//) { # eat // to end of line
        "\n";
    } elsif ($self->{buf} =~ s/^\/\*[^\0]*?\*\///) { # eat /* .. */
        '';
    } elsif ($self->{buf} =~ s/^(\/)(?=[^\/\*])//) { # eat / not followed by / or *
        $1;
    } else {                                    # incomplete fragment - stop
        undef;
    }
}

#
#   SetSink
#
#   The sink we get is really the whitespace filter's sink, and we sink
#   into the whitespace filter.
#
sub SetSink {
    my ($self, $sink) = @_;
    $self->{wsf}->SetSink($sink);
}

##############################################################################
#
#
#   JS - comments are // or /* ... */, invoked via <SCRIPT>...
#   CSS - comments are /* ... */, invoked via <STYLE TYPE="text/css">
#
#   They are both just CPP thingies.  Both should someday remove whitespace

package JS;
@JS::ISA = qw(CPP);

package CSS;
@CSS::ISA = qw(CPP);

##############################################################################
#
#   HTML filter package
#
#   The HTML filter performs the following operations:
#
#       Send the final output through a whitespace filter.
#
#       Remove comments.
#
#   Someday it will also...
#
#       Recognize embedded stylesheets and scripts and generate a subfilter
#       to handle them.
#
#       Compress spaces outside quotation marks.
#
#   Instance data:
#
#       buf         = unprocessed text
#       wsf         = child WhitespaceFilter
#       script      = current script sink
#       endScript   = sub that recognizes end of script
#       ultSink     = the ultimate sink

package HTML;
@HTML::ISA = qw(TokenFilter);

sub new {
    my($class) = shift;
    my $self = new Filter;
    $self->{wsf} = new WhitespaceFilter;
    $self->{sink} = $self->{wsf};           # initially use this script
    bless $self, $class;
}

#
#   SetSink
#
#   The sink we get is really the whitespace filter's sink, and we sink
#   into the whitespace filter.
#
sub SetSink {
    my ($self, $sink) = @_;
    $self->{ultSink} = $sink;
    $self->{wsf}->SetSink($sink);
}

#
#   NextHTMLToken
#
#   An HTML token is one of the following:
#
#   -   A hunk of boring text.
#   -   A comment (thrown away).
#   -   A matched <...> thingie.

sub NextHTMLToken {
    my($self) = shift;

    #
    #   Any string of non "<" counts as a boring text token.
    #
    #   Be careful not to mistake <!DOCTYPE...> as a comment.
    #
    if ($self->{buf} =~ s/^([^<]+)//) {
        $1;
    } elsif ($self->{buf} =~ s/^(<!--[^\0]*?-->)//) {  # Eat full comments
        '';
    } elsif ($self->{buf} =~ s/^(<![^-][^>]*>)//) { # <!DOCTYPE ...>
        $1;
    } elsif ($self->{buf} =~ s/^(<[^!][^>]*>)//) { # <something else>
        $1;
    } else {                                    # incomplete fragment - stop
        undef;
    }
}

#
#   NextScriptToken
#
#   A script token is anything that isn't the word </SCRIPT>.
#

sub NextScriptToken
{
    my($self) = shift;
    if ($self->{buf} =~ s,^(</SCRIPT>),,i) {
        $1;
    } elsif ($self->{buf} =~ s,^(.*?)</SCRIPT>,,i) {
        $1;
    } else {
        my $tok = $self->{buf};
        $self->{buf} = '';
        $tok;
    }
}

#
#   NextToken
#
#   Returns either an HTML token or a script token.
#
sub NextToken {
    my($self) = shift;
    if (defined $self->{script}) {
        $self->NextScriptToken();
    } else {
        $self->NextHTMLToken();
    }
}

#
#   _Redirect - Private method that redirects parsing to a script language.
#
#       $self->_Redirect($scr, $end);
#
#       $scr = script object to hook in
#       $end = sub that recognizes the end of the script
#
#
sub _Redirect {
    my ($self, $scr, $end) = @_;
    $self->{script} = $self->{sink} = $scr;
    $scr->SetSink($self->{ultSink});
    $self->{endScript} = $end;
}

sub EachToken {
    my($self, $tok) = @_;

    if ($tok =~ /^<SCRIPT/i) {
        $self->{inScript} = 1; # BUGBUG create a script sink
        my $elem = new Element($tok);
        my $lang = lc $elem->Attr("LANGUAGE");
        my $scr;
        # No language implies JScript
        if (!defined($lang) || $lang eq 'jscript' || $lang eq 'javascript') {
            $scr = new CPP;
        } else {
            warn "Unknown script language [$lang]";
            # Just use the whitespace filter as the unknown script filter
            $scr = new WhitespaceFilter;
        }
        $self->_Redirect($scr, sub { m,^</SCRIPT>,i });

    } elsif ($tok =~ /<STYLE/i) {
        $self->_Redirect(new CSS, sub { m,^</STYLE>,i });

    } elsif (defined($self->{endScript}) && &{$self->{endScript}}($tok)) {
        delete $self->{endScript};
        $self->{script}->Flush();
        delete $self->{script};
        $self->{sink} = $self->{wsf};
    }
    $self->SinkAdd($tok);
}

##############################################################################
#
#   Main package
#

package main;

#
#   Set up some defaults.
#
my $force_type = undef;                 # do not force file type
$::RetailVersion = 1;                   # not the debugging version
my $outfile = undef;                    # output file not known yet
my %VAR = ();                           # No variables defined yet
my $verbose = undef;                    # not verbose mode

##############################################################################
#
#   CreateTypeFilter - Create a filter for the specified type.
#

my $types = {
    html    => sub { new HTML },        # HTML
    htm     => sub { new HTML },
    htx     => sub { new HTML },
    js      => sub { new JS },          # Javascript
    jsx     => sub { new JS },
    css     => sub { new CSS },         # Cascading style sheet
    csx     => sub { new CSS },
};

sub CreateTypeFilter {
    my $sub = $types->{lc shift};
    &$sub;
}

##############################################################################
#
#   Command line parsing
#

sub Usage {
    die "Usage: htmlcln [-t [html|js|css]] [-DVAR[=value]...] [-v] -o outfile infile\n";
}

#
#   AddDefine - Handle a -D command line option.
#
sub AddDefine {
    my $line = shift;
    if ($line =~ /=/) {
        $VAR{$`} = $';
    } else {
        $VAR{$line} = 1;
    }
}

sub ParseCommandLine {

    #
    #   Scream through the command line arguments.
    #

    while ($#ARGV >= 0 && $ARGV[0] =~ /^-(.)(.*)/) {
        # $1 - command
        # $2 - optional argument

        my($cmd, $val) = ($1, $2);

        shift(@ARGV);

        if ($cmd eq 't') {
            $val = shift(@ARGV) if $val eq '';
            $force_type = $val;
        } elsif ($cmd eq 'D') {
            AddDefine($val);
        } elsif ($cmd eq 'o') {
            $val = shift(@ARGV) if $val eq '';
            $outfile = $val;
        } elsif ($cmd eq 'v') {
            $verbose = 1;
        } else {
            Usage();
        }
    }

    #
    #   What's left should be a filename, and there should be an output file.
    #

    my $infile = shift(@ARGV);
    Usage() unless defined $infile && defined $outfile && $#ARGV == -1;

    #
    #   If the filetype is not being overridden, then take it from the filename.
    #
    if (!defined $force_type) {
        ($force_type) = $infile =~ /\.(.*)/;
    }

    #
    #   Include debug goo only if building DBG=1 and FULL_DEBUG is set in the
    #   environment.
    #
    $::RetailVersion = 0 if defined($VAR{"DBG"}) && defined($ENV{"FULL_DEBUG"});

    $infile;
}

##############################################################################
#
#   File processing
#

sub ProcessFile {
    my $infile = shift;

    #
    #   Create the final sink.
    #
    my $sink = new OutFile;
    $sink->SetOutput($outfile);

    #
    #   Set up the default filter based on the file type.
    #
    my $Type = CreateTypeFilter($force_type);
    $Type->SetSink($sink);

    #
    #   Create the DebugFilter which sits at the top of the chain.
    #
    my $Filter = new DebugFilter;
    $Filter->SetSink($Type);

    #
    #   All the plumbing is ready - start pumping data.
    #
    open(I, $infile) || die "Cannot open $infile for reading ($!)\n";

    while (<I>) {
        $Filter->Add($_);
    }
    $Filter->Flush();
    $Filter->Close();
}

##############################################################################
#
#   Main program
#

{
    my $infile = ParseCommandLine();
    ProcessFile($infile);
}
Add source files 2020-09-26 03:20:57 -05:00			`#!perl`

			`=head1 NAME`

			`Htmlcln - Sanitizes HTML and related files before publication`

			`=head1 SYNOPSIS`

			`perl htmlcln.pl [-t [html\|js\|css]] [-DVAR[=value] ...] [-v]`
			`-o I<outfile> I<infile>`

			`=head1 DESCRIPTION`

			`The Htmlcln program preprocesses text files such as HTML, JS, or CSS`
			`files and cleans them up.`

			`=over 8`
			`Comments are removed.`

			`Blank lines are removed.`

			`Sections marked "debug" are removed in the retail build.`

			`=back`

			`=head1 OPTIONS`

			`=over 8`

			`=item B<-t> [html\|js\|css]`

			`Htmlcln normally tries to guess what kind of file it is processing from`
			`the filename extension. You can explicitly override the guess with the`
			`B<-t> command line switch.`

			`=item B<-D>VAR[=value] ...`

			`Command line definitions are supported in the same manner as the C compiler.`
			`The only command-line variable we pay attention to is the -DDBG flag, which`
			`indicates that this is a debug build.`

			`=item B<-o> I<outfile>`

			`Specifies the name of the output file.`

			`=item I<srcfile>`

			`Specifies the name of the source file.`

			`=back`

			`=cut`

			`use strict qw(vars refs subs);`

			`##############################################################################`
			`#`
			`# Element - A class that parses HTML elements`
			`#`
			`# Instance variables:`
			`#`
			`# raw = the raw text`
			`# attr = hash of attributes`
			`# tag = name of tag, including slash`
			`#`
			`# If the value of an attribute hash is undef, it means that the`
			`# attribute is present but with no value.`
			`#`

			`package Element;`

			`#`
			`# Constructor: $elem = new Element("<TABLE BORDER>");`
			`#`
			`sub new {`
			`my ($class, $raw) = @_;`
			`my $attr = { };`
			`my $self = { raw => $raw, attr => $attr };`
			`my $tag;`

			`if ($raw =~ s/^<([^\s>]*)//) {`
			`$self->{tag} = uc $1;`
			`if ($self->{tag} =~ /^[A-Z]/) {`
			`$raw =~ s/>$//;`
			`for (;;) {`
			`if ($raw =~ s/^\s([-A-Za-z]+)="([^"])"// \|\|`
			`$raw =~ s/^\s([-A-Za-z]+)='([^'])'// \|\|`
			`$raw =~ s/^\s([-A-Za-z]+)=(\S)//) {`
			`$attr->{uc $1} = $2;`
			`} elsif ($raw =~ s/^\s*([A-Za-z]+)//) {`
			`$attr->{uc $1} = undef;`
			`} else {`
			`last;`
			`}`
			`}`
			`}`
			`} else {`
			`warn "Can't parse \"$raw\"";`
			`}`

			`bless $self, $class;`

			`}`

			`#`
			`# Element::Tag`
			`#`
			`# Returns the tag.`
			`#`
			`sub Tag {`
			`my $self = shift;`
			`$self->{tag};`
			`}`

			`#`
			`# Element::Attr`
			`#`
			`# Returns the value of the attribute.`
			`#`
			`sub Attr {`
			`my ($self, $attr) = @_;`
			`$self->{attr}{uc $attr};`
			`}`

			`#`
			`# Element::Exists`
			`#`
			`# Returns the presence of the attribute.`
			`#`
			`sub Exists {`
			`my ($self, $attr) = @_;`
			`exists $self->{attr}{uc $attr};`
			`}`


			`##############################################################################`
			`#`
			`# Filter base class`
			`#`
			`# Basic stuff to save people some hassle.`
			`#`
			`# Per perl tradition, an object is a ref to an anonymous hash where the`
			`# state is kept.`
			`#`
			`# Instance variables:`
			`#`
			`# sink = reference to filter sink`
			`#`

			`package Filter;`

			`sub new {`
			`my($class) = @_;`
			`bless { }, $class;`
			`}`

			`sub SetSink {`
			`my ($self, $sink) = @_;`
			`$self->{sink} = $sink;`
			`}`

			`sub Add {`
			`my $self = shift;`
			`$self->{sink}->Add(@_);`
			`}`

			`sub Flush { }`

			`sub Close {`
			`my $self = shift;`
			`$self->{sink}->Close(@_);`
			`}`

			`sub SinkAdd {`
			`my $self = shift;`
			`$self->{sink}->Add(@_);`
			`}`

			`##############################################################################`
			`#`
			`# TokenFilter filter package`
			`#`
			`# Does not modify the stream, but merely chops them into tokens, as`
			`# recognized by NextToken and processed by EachToken.`
			`#`
			`# Instance data:`
			`#`
			`# buf = unprocessed text`
			`#`
			`package TokenFilter;`
			`@TokenFilter::ISA = qw(Filter);`

			`#`
			`# Append the incoming text to the buffer, then suck out entire tokens.`
			`#`
			`sub Add {`
			`my($self, $text) = @_;`
			`my $tok;`

			`$self->{buf} .= $text;`

			`while ($self->{buf} ne '' && defined($tok = $self->NextToken))`
			`{`
			`$self->EachToken($tok);`
			`}`
			`}`

			`sub Flush {`
			`my $self = shift;`
			`$self->EachToken($self->{buf});`
			`}`

			`#`
			`# By default, we just sink tokens to the next layer.`
			`#`
			`sub EachToken {`
			`my($self, $tok) = @_;`
			`$self->SinkAdd($tok);`
			`}`


			`##############################################################################`
			`#`
			`# LineFilter filter package`
			`#`
			`# Tokenizer that recognizes lines.`
			`#`
			`# Instance data:`
			`#`
			`# buf = unprocessed text`
			`#`
			`package LineFilter;`
			`@LineFilter::ISA = qw(TokenFilter);`

			`#`
			`# Recognize lines.`
			`#`
			`sub NextToken {`
			`my($self) = shift;`

			`if ($self->{buf} =~ s/([^\n]*\n)//) {`
			`$1;`
			`} else {`
			`undef;`
			`}`
			`}`

			`##############################################################################`
			`#`
			`# WhitespaceFilter filter package`
			`#`
			`# Removes blank lines and removes leading and trailing whitespace.`
			`#`
			`# Someday: Collapse multiple whitespace outside of quotation marks.`
			`#`
			`package WhitespaceFilter;`
			`@WhitespaceFilter::ISA = qw(LineFilter);`

			`sub EachToken {`
			`my($self, $line) = @_;`
			`$line =~ s/^[ \t]+//;`
			`$line =~ s/[ \t]+$//;`
			`$self->SinkAdd($line) unless $line =~ /^$/;`
			`}`

			`##############################################################################`
			`#`
			`# OutFile filter package`
			`#`
			`# Writes its output to a file.`
			`#`
			`# Instance data:`
			`#`
			`# fh = name of file handle`
			`#`
			`#`

			`package OutFile;`
			`@OutFile::ISA = qw(Filter);`
			`no strict 'refs'; # Our filename globs aren't very strict`

			`#`
			`# Custom method: SetOutput. Opens an output file.`
			`#`

			`my $seq = 0;`

			`sub SetOutput {`
			`my($self, $file) = @_;`
			`$self->{fh} = "OutFile" . $seq++;`
			`open($self->{fh}, ">$file") \|\| die "Unable to open $file for writing ($!)\n";`
			`}`

			`sub Add {`
			`my $self = shift;`
			`print { $self->{fh} } @_;`
			`}`

			`sub Close {`
			`my $self = shift;`
			`close($self->{fh});`
			`}`

			`##############################################################################`
			`#`
			`# DebugFilter filter package`
			`#`
			`# Filters out ;debug and ;begin_debug blocks if building retail.`
			`#`
			`# Instance data:`
			`#`
			`# skip = nonzero if we are inside an ignored ;begin_debug block`
			`# buf = unprocessed text`
			`#`

			`package DebugFilter;`
			`@DebugFilter::ISA = qw(LineFilter);`
			`no strict 'refs'; # Our filename globs aren't very strict`

			`#`
			`# See if the line contains a debug marker.`
			`# If applicable, send the line down the chain.`
			`#`
			`sub EachToken {`
			`my($self, $line) = @_;`

			`# ;begin_debug means start skipping if retail`
			`if ($line =~ s/;begin_debug//) {`
			`$self->{skip} = $::RetailVersion;`
			`}`

			`# If we were skipping, then ;end_debug ends skipping and we should eat it`
			`if ($line =~ s/;end_debug// && $self->{skip}) {`
			`$self->{skip} = 0;`
			`} elsif ($line =~ s/;debug// && $::RetailVersion) {`
			`# A one-shot debug line in retail - skip it`
			`} elsif (!$self->{skip}) {`
			`$self->SinkAdd($line); # send it down the chain`
			`}`
			`}`

			`##############################################################################`
			`#`
			`# CPP filter package`
			`#`
			`# The CPP filter performs the following operations:`
			`#`
			`# Removes C and C++-style comments.`
			`#`
			`# Filters whitespace.`
			`#`
			`# Instance data:`
			`#`
			`# buf = unprocessed text`
			`# wsf = child WhitespaceFilter`
			`# script = current script sink`
			`# ultSink = the ultimate sink`

			`package CPP;`
			`@CPP::ISA = qw(TokenFilter);`

			`sub new {`
			`my($class) = shift;`
			`my $self = new Filter;`
			`$self->{wsf} = new WhitespaceFilter; # sink into a whitespace filter`
			`$self->{sink} = $self->{wsf}; # initially use this script`
			`bless $self, $class;`
			`}`

			`#`
			`# Recognize tokens, which are lines or /* ... */ comments.`
			`#`
			`sub NextToken {`
			`my($self) = shift;`

			`if ($self->{buf} =~ s/^([^\/]+)//) { # eat up to a slash`
			`$1;`
			`} elsif ($self->{buf} =~ s/^\/\/.*?\n//) { # eat // to end of line`
			`"\n";`
			`} elsif ($self->{buf} =~ s/^\/\[^\0]?\\///) { # eat / .. */`
			`'';`
			`} elsif ($self->{buf} =~ s/^(\/)(?=[^\/\])//) { # eat / not followed by / or `
			`$1;`
			`} else { # incomplete fragment - stop`
			`undef;`
			`}`
			`}`

			`#`
			`# SetSink`
			`#`
			`# The sink we get is really the whitespace filter's sink, and we sink`
			`# into the whitespace filter.`
			`#`
			`sub SetSink {`
			`my ($self, $sink) = @_;`
			`$self->{wsf}->SetSink($sink);`
			`}`

			`##############################################################################`
			`#`
			`#`
			`# JS - comments are // or /* ... */, invoked via <SCRIPT>...`
			`# CSS - comments are /* ... */, invoked via <STYLE TYPE="text/css">`
			`#`
			`# They are both just CPP thingies. Both should someday remove whitespace`

			`package JS;`
			`@JS::ISA = qw(CPP);`

			`package CSS;`
			`@CSS::ISA = qw(CPP);`

			`##############################################################################`
			`#`
			`# HTML filter package`
			`#`
			`# The HTML filter performs the following operations:`
			`#`
			`# Send the final output through a whitespace filter.`
			`#`
			`# Remove comments.`
			`#`
			`# Someday it will also...`
			`#`
			`# Recognize embedded stylesheets and scripts and generate a subfilter`
			`# to handle them.`
			`#`
			`# Compress spaces outside quotation marks.`
			`#`
			`# Instance data:`
			`#`
			`# buf = unprocessed text`
			`# wsf = child WhitespaceFilter`
			`# script = current script sink`
			`# endScript = sub that recognizes end of script`
			`# ultSink = the ultimate sink`

			`package HTML;`
			`@HTML::ISA = qw(TokenFilter);`

			`sub new {`
			`my($class) = shift;`
			`my $self = new Filter;`
			`$self->{wsf} = new WhitespaceFilter;`
			`$self->{sink} = $self->{wsf}; # initially use this script`
			`bless $self, $class;`
			`}`

			`#`
			`# SetSink`
			`#`
			`# The sink we get is really the whitespace filter's sink, and we sink`
			`# into the whitespace filter.`
			`#`
			`sub SetSink {`
			`my ($self, $sink) = @_;`
			`$self->{ultSink} = $sink;`
			`$self->{wsf}->SetSink($sink);`
			`}`

			`#`
			`# NextHTMLToken`
			`#`
			`# An HTML token is one of the following:`
			`#`
			`# - A hunk of boring text.`
			`# - A comment (thrown away).`
			`# - A matched <...> thingie.`

			`sub NextHTMLToken {`
			`my($self) = shift;`

			`#`
			`# Any string of non "<" counts as a boring text token.`
			`#`
			`# Be careful not to mistake <!DOCTYPE...> as a comment.`
			`#`
			`if ($self->{buf} =~ s/^([^<]+)//) {`
			`$1;`
			`} elsif ($self->{buf} =~ s/^(<!--[^\0]*?-->)//) { # Eat full comments`
			`'';`
			`} elsif ($self->{buf} =~ s/^(<![^-][^>]*>)//) { # <!DOCTYPE ...>`
			`$1;`
			`} elsif ($self->{buf} =~ s/^(<[^!][^>]*>)//) { # <something else>`
			`$1;`
			`} else { # incomplete fragment - stop`
			`undef;`
			`}`
			`}`

			`#`
			`# NextScriptToken`
			`#`
			`# A script token is anything that isn't the word </SCRIPT>.`
			`#`

			`sub NextScriptToken`
			`{`
			`my($self) = shift;`
			`if ($self->{buf} =~ s,^(</SCRIPT>),,i) {`
			`$1;`
			`} elsif ($self->{buf} =~ s,^(.*?)</SCRIPT>,,i) {`
			`$1;`
			`} else {`
			`my $tok = $self->{buf};`
			`$self->{buf} = '';`
			`$tok;`
			`}`
			`}`

			`#`
			`# NextToken`
			`#`
			`# Returns either an HTML token or a script token.`
			`#`
			`sub NextToken {`
			`my($self) = shift;`
			`if (defined $self->{script}) {`
			`$self->NextScriptToken();`
			`} else {`
			`$self->NextHTMLToken();`
			`}`
			`}`

			`#`
			`# _Redirect - Private method that redirects parsing to a script language.`
			`#`
			`# $self->_Redirect($scr, $end);`
			`#`
			`# $scr = script object to hook in`
			`# $end = sub that recognizes the end of the script`
			`#`
			`#`
			`sub _Redirect {`
			`my ($self, $scr, $end) = @_;`
			`$self->{script} = $self->{sink} = $scr;`
			`$scr->SetSink($self->{ultSink});`
			`$self->{endScript} = $end;`
			`}`

			`sub EachToken {`
			`my($self, $tok) = @_;`

			`if ($tok =~ /^<SCRIPT/i) {`
			`$self->{inScript} = 1; # BUGBUG create a script sink`
			`my $elem = new Element($tok);`
			`my $lang = lc $elem->Attr("LANGUAGE");`
			`my $scr;`
			`# No language implies JScript`
			`if (!defined($lang) \|\| $lang eq 'jscript' \|\| $lang eq 'javascript') {`
			`$scr = new CPP;`
			`} else {`
			`warn "Unknown script language [$lang]";`
			`# Just use the whitespace filter as the unknown script filter`
			`$scr = new WhitespaceFilter;`
			`}`
			`$self->_Redirect($scr, sub { m,^</SCRIPT>,i });`

			`} elsif ($tok =~ /<STYLE/i) {`
			`$self->_Redirect(new CSS, sub { m,^</STYLE>,i });`

			`} elsif (defined($self->{endScript}) && &{$self->{endScript}}($tok)) {`
			`delete $self->{endScript};`
			`$self->{script}->Flush();`
			`delete $self->{script};`
			`$self->{sink} = $self->{wsf};`
			`}`
			`$self->SinkAdd($tok);`
			`}`

			`##############################################################################`
			`#`
			`# Main package`
			`#`

			`package main;`

			`#`
			`# Set up some defaults.`
			`#`
			`my $force_type = undef; # do not force file type`
			`$::RetailVersion = 1; # not the debugging version`
			`my $outfile = undef; # output file not known yet`
			`my %VAR = (); # No variables defined yet`
			`my $verbose = undef; # not verbose mode`

			`##############################################################################`
			`#`
			`# CreateTypeFilter - Create a filter for the specified type.`
			`#`

			`my $types = {`
			`html => sub { new HTML }, # HTML`
			`htm => sub { new HTML },`
			`htx => sub { new HTML },`
			`js => sub { new JS }, # Javascript`
			`jsx => sub { new JS },`
			`css => sub { new CSS }, # Cascading style sheet`
			`csx => sub { new CSS },`
			`};`

			`sub CreateTypeFilter {`
			`my $sub = $types->{lc shift};`
			`&$sub;`
			`}`

			`##############################################################################`
			`#`
			`# Command line parsing`
			`#`

			`sub Usage {`
			`die "Usage: htmlcln [-t [html\|js\|css]] [-DVAR[=value]...] [-v] -o outfile infile\n";`
			`}`

			`#`
			`# AddDefine - Handle a -D command line option.`
			`#`
			`sub AddDefine {`
			`my $line = shift;`
			`if ($line =~ /=/) {`
			$VAR{$`} = $';
			`} else {`
			`$VAR{$line} = 1;`
			`}`
			`}`

			`sub ParseCommandLine {`

			`#`
			`# Scream through the command line arguments.`
			`#`

			`while ($#ARGV >= 0 && $ARGV[0] =~ /^-(.)(.*)/) {`
			`# $1 - command`
			`# $2 - optional argument`

			`my($cmd, $val) = ($1, $2);`

			`shift(@ARGV);`

			`if ($cmd eq 't') {`
			`$val = shift(@ARGV) if $val eq '';`
			`$force_type = $val;`
			`} elsif ($cmd eq 'D') {`
			`AddDefine($val);`
			`} elsif ($cmd eq 'o') {`
			`$val = shift(@ARGV) if $val eq '';`
			`$outfile = $val;`
			`} elsif ($cmd eq 'v') {`
			`$verbose = 1;`
			`} else {`
			`Usage();`
			`}`
			`}`

			`#`
			`# What's left should be a filename, and there should be an output file.`
			`#`

			`my $infile = shift(@ARGV);`
			`Usage() unless defined $infile && defined $outfile && $#ARGV == -1;`

			`#`
			`# If the filetype is not being overridden, then take it from the filename.`
			`#`
			`if (!defined $force_type) {`
			`($force_type) = $infile =~ /\.(.*)/;`
			`}`

			`#`
			`# Include debug goo only if building DBG=1 and FULL_DEBUG is set in the`
			`# environment.`
			`#`
			`$::RetailVersion = 0 if defined($VAR{"DBG"}) && defined($ENV{"FULL_DEBUG"});`

			`$infile;`
			`}`

			`##############################################################################`
			`#`
			`# File processing`
			`#`

			`sub ProcessFile {`
			`my $infile = shift;`

			`#`
			`# Create the final sink.`
			`#`
			`my $sink = new OutFile;`
			`$sink->SetOutput($outfile);`

			`#`
			`# Set up the default filter based on the file type.`
			`#`
			`my $Type = CreateTypeFilter($force_type);`
			`$Type->SetSink($sink);`

			`#`
			`# Create the DebugFilter which sits at the top of the chain.`
			`#`
			`my $Filter = new DebugFilter;`
			`$Filter->SetSink($Type);`

			`#`
			`# All the plumbing is ready - start pumping data.`
			`#`
			`open(I, $infile) \|\| die "Cannot open $infile for reading ($!)\n";`

			`while (<I>) {`
			`$Filter->Add($_);`
			`}`
			`$Filter->Flush();`
			`$Filter->Close();`
			`}`

			`##############################################################################`
			`#`
			`# Main program`
			`#`

			`{`
			`my $infile = ParseCommandLine();`
			`ProcessFile($infile);`
			`}`