You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

299 lines
6.1 KiB

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

#!/usr/local/bin/perl
##
## changehost.pl (C) 1995 Andrew Scherpbier
##
## This program will change hostnames of URLs in the document database and index.
##
## usage:
## changehost.pl database_base from to
##
## example:
## changehost.pl /opt/www/htdig/sdsu www.sdsu.edu www.northpole.net
##
## Two new database will be created with a base of '/tmp/new'.
## These databases can then be used by htsearch.
##
use GDBM_File;
$base = $ARGV[0];
$from = $ARGV[1];
$to = $ARGV[2];
$dbfile = "$base.docdb";
$newfile = "/tmp/new.docdb";
##
## Convert the document database first.
##
tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'";
tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!";
while (($key, $value) = each %docdb)
{
if ($key =~ /http:\/\/$from/i)
{
%record = parse_ref_record($value);
$key =~ s/http:\/\/$from/http:\/\/$to/i;
print "$key\n";
$t = $record{"URL"};
$t =~ s/http:\/\/$from/http:\/\/$to/i;
$record{"URL"} = $t;
$value = create_ref_record(%record);
}
$newdb{$key} = $value;
}
untie %newdb;
untie %docdb;
##
## Now create the document index
##
$newfile = "/tmp/new.docs.index";
$dbfile = "$base.docs.index";
tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'";
tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!";
while (($key, $value) = each %docdb)
{
if ($value =~ /http:\/\/$from/i)
{
$value =~ s/http:\/\/$from/http:\/\/$to/i;
}
$newdb{$key} = $value;
}
untie %newdb;
untie %docdb;
######################################################################
sub create_ref_record
{
local(%rec) = @_;
local($s);
if (exists $rec{"ID"})
{
$s .= pack("Ci", 0, $rec{"ID"});
}
if (exists $rec{"TIME"})
{
$s .= pack("Ci", 1, $rec{"TIME"});
}
if (exists $rec{"ACCESSED"})
{
$s .= pack("Ci", 2, $rec{"ACCESSED"});
}
if (exists $rec{"STATE"})
{
$s .= pack("Ci", 3, $rec{"STATE"});
}
if (exists $rec{"SIZE"})
{
$s .= pack("Ci", 4, $rec{"SIZE"});
}
if (exists $rec{"LINKS"})
{
$s .= pack("Ci", 5, $rec{"LINKS"});
}
if (exists $rec{"IMAGESIZE"})
{
$s .= pack("Ci", 6, $rec{"IMAGESIZE"});
}
if (exists $rec{"HOPCOUNT"})
{
$s .= pack("Ci", 7, $rec{"HOPCOUNT"});
}
if (exists $rec{"URL"})
{
$s .= pack("Ci", 8, length($rec{"URL"}));
$s .= $rec{"URL"};
}
if (exists $rec{"HEAD"})
{
$s .= pack("Ci", 9, length($rec{"HEAD"}));
$s .= $rec{"HEAD"};
}
if (exists $rec{"TITLE"})
{
$s .= pack("Ci", 10, length($rec{"TITLE"}));
$s .= $rec{"TITLE"};
}
if (exists $rec{"DESCRIPTIONS"})
{
@v = split('', $rec{"DESCRIPTIONS"});
$s .= pack("Ci", 11, $#v - 1);
foreach (@v)
{
$s .= pack("i", length($_));
$s .= $_;
}
}
if (exists $rec{"ANCHORS"})
{
@v = split('', $rec{"ANCHORS"});
$s .= pack("Ci", 12, $#v - 1);
foreach (@v)
{
$s .= pack("i", length($_));
$s .= $_;
}
}
if (exists $rec{"EMAIL"})
{
$s .= pack("Ci", 13, length($rec{"EMAIL"}));
$s .= $rec{"EMAIL"};
}
if (exists $rec{"NOTIFICATION"})
{
$s .= pack("Ci", 14, length($rec{"NOTIFICATION"}));
$s .= $rec{"NOTIFICATION"};
}
if (exists $rec{"SUBJECT"})
{
$s .= pack("Ci", 15, length($rec{"SUBJECT"}));
$s .= $rec{"SUBJECT"};
}
return $s;
}
sub parse_ref_record
{
local($value) = @_;
local(%rec, $length, $count, $result);
while (length($value) > 0)
{
$what = unpack("C", $value);
$value = substr($value, 1);
if ($what == 0)
{
# ID
$rec{"ID"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 1)
{
# TIME
$rec{"TIME"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 2)
{
# ACCESSED
$rec{"ACCESSED"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 3)
{
# STATE
$rec{"STATE"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 4)
{
# SIZE
$rec{"SIZE"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 5)
{
# LINKS
$rec{"LINKS"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 6)
{
# IMAGESIZE
$rec{"IMAGESIZE"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 7)
{
# HOPCOUNT
$rec{"HOPCOUNT"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 8)
{
# URL
$length = unpack("i", $value);
$rec{"URL"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 9)
{
# HEAD
$length = unpack("i", $value);
$rec{"HEAD"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 10)
{
# TITLE
$length = unpack("i", $value);
$rec{"TITLE"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 11)
{
# DESCRIPTIONS
$count = unpack("i", $value);
$value = substr($value, 4);
$result = "";
foreach (1 .. $count)
{
$length = unpack("i", $value);
$result = $result . unpack("x4 A$length", $value) . "";
$value = substr($value, 4 + $length);
}
chop $result;
$rec{"DESCRIPTIONS"} = $result;
}
elsif ($what == 12)
{
# ANCHORS
$count = unpack("i", $value);
$value = substr($value, 4);
$result = "";
foreach (1 .. $count)
{
$length = unpack("i", $value);
$result = $result . unpack("x4 A$length", $value) . "";
$value = substr($value, 4 + $length);
}
chop $result;
$rec{"ANCHORS"} = $result;
}
elsif ($what == 13)
{
# EMAIL
$length = unpack("i", $value);
$rec{"EMAIL"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 14)
{
# NOTIFICATION
$length = unpack("i", $value);
$rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 15)
{
# SUBJECT
$length = unpack("i", $value);
$rec{"SUBJECT"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
}
return %rec;
}