#! /usr/bin/env perl

%attr = ();

sub encode {
    my $txt = shift;
    $txt =~ s/([<>&\000-\010\013-\037\177-\377])/'&#'.ord($1).';'/ge;
    return $txt;
}

sub decode {
    my $txt = shift;
    $txt =~ s/\\(\\|n|[0-7][0-7][0-7])/&decodeChar($1)/ge;
    return $txt;
}

sub decodeChar {
    my $c = shift;
    if    ($c eq '\\') { '\\'; }
    elsif ($c eq 'n' ) { "\n"; }
    else { chr(oct($c)); }
}

%empty = (
	  'AUTHOR.EXTERN'	=> 1,
	  'META'		=> 1,
	  'BIBTEX.EXTERN'	=> 1,
	  'MATH.EXTERN'		=> 1,
	  'PICTURE.EXTERN'	=> 1,
	  'CODE.EXTERN'		=> 1,
	  'PTR.EXTERN'		=> 1
	  );

# we now try to ignore text() nodes consisting only of whitespace
# if they occur immediately following a <P> tag or a <P.SILENT> tag
# or immediately preceding a </P> tag or a </P.SILENT> tag

my $ignorespaces=0;

my $toilet = '';
sub flush {
    if ($toilet) {
	print $toilet;
	$toilet = '';
    }
}

while (<>) {
    if (/^\((.+)$/) {
	my $tag = $1;
	&flush;
	# try to ignore spaces that immediately follow <P> and <P.SILENT>
	$ignorespaces=1 if $tag eq 'P' || $tag eq 'P.SILENT';
	print "<$tag";
	while (($key,$val) = each %attr) {
	    $val=&encode($val);
	    print " $key=\"$val\"";
	}
	%attr=();
	if ($empty{$tag}) { print "/>"; }
	else { print ">"; }
    }
    elsif (/^\)(.+)$/) {
	my $tag = $1;
	if ($tag eq 'P' || $tag eq 'P.SILENT') { $toilet=''; }
	else { &flush; }
	$ignorespaces=0;
	print "</$1>" unless $empty{$1};
	%attr=();
    }
    elsif (/^A(\S+) IMPLIED/) {}
    elsif (/^A(\S+) (\S+) /) {
	my $key = $1;
	my $val = $'; chop($val); $val=&decode($val);
	$attr{$key} = $val;
    }
    elsif (/^-/) {
	&flush;
	my $txt = $'; chop($txt);
	$txt = &decode($txt);
	if ($txt =~ /^\s*$/) {
	    $toilet = &encode($txt) unless $ignorespaces;
	}
	else {
	    print &encode($txt);
	}
	$ignorespaces=0;
    }
    elsif (/^\?/) {
	&flush;
	$ignorespaces=0;
	my $txt = $'; chop($txt);
	print "<?",&decode($txt),"?>";
    }
    elsif (/^s/) {}
    elsif (/^N/) {}
    elsif (/^C/) {}
    else { die "unexpected output from nsgmls: $_"; }
}

