#!/usr/local/bin/perl -w
#Strip Base Tags v1.0
#
#This script strips html base tags from documents, saving them at the end
#of the document in a comment so that they can be rematched if necessary.
#
#There can be any number of href or src's. The script actually takes
#anything between "" and removes all but the rightmost element, where
#elements are delimited by '/'. Not perfect.
#
#Bugs: Only works on URLs within double quotes. Can inadvertantly chop off
#things which are not urls. This happens to things which satisfy the
#following conditions:
#1. inside '<' and '>'
#2. after an odd number of '"'
#3. containing a '/'
#
#Usage: stripbasetags-1.0 infile [outfile]
#outfile defaults to infile.bak
#
#By Satya .
#Use at own risk. Freeware. Please keep these comments intact. Inform me
#of changes and redistribution. Give me credit if used anywhere.
#Standard disclaimers apply.
#number of lines to ignore at top (useful to preserve base href tags)
my $choptop=3;
#whether or not to comment out all tags in the top $choptop lines
#1=comment 0=don't comment
my $comment=1;
#location of the move binary
my $mv='/bin/mv';
######POLICE#LINE##DO#NOT#CROSS############POLICE#LINE##DO#NOT#CROSS######
use strict;
my $infile=$ARGV[0];
my $outfile=($ARGV[1] || $infile.'.bak');
my ($i,$j);
my ($inquote)=(0);
my (@words,@hrefs,@snippets);
my $line;
#print "$infile\n$outfile\n";
open (IN,"<$infile") || die "$0: Can't open input $infile: $!\n";
open (OUT,">$outfile") || die "$0: Can't open output $outfile: $!\n";
print OUT ;
close(IN);
close(OUT);
($infile,$outfile)=($outfile,$infile);
open (IN,"<$infile") || die "$0: Can't open input $infile: $!\n";
open (OUT,">$outfile") || die "$0: Can't open output $outfile: $!\n";
while() {
if($.<=$choptop) {
if ($comment) {$_=~s/>/ -->/; $_=~s/";
close(OUT);
#EOF