###############################################################################
#
# Ferret search engine
#
# Written by Brian C. White <bcwhite@verisim.com>
# Copyright (c) 1996 by Verisim, Inc.  All rights reserved.
#
###############################################################################

package Ferret;

require 5.002;
require Exporter;
require DynaLoader;
require AutoLoader;

use Carp;

@ISA = qw(Exporter DynaLoader AutoLoader);
$VERSION   = '1.1.3';
@EXPORT	   = qw();
@EXPORT_OK = qw();

$] == 5.00307 or die "Error: This instance of Ferret has been compiled against Perl v5.00307\n		  The currently installed version of Perl is v$]\n";

bootstrap Ferret $VERSION;



sub AUTOLOAD {
	my $constname = $AUTOLOAD;
	$constname =~ s/.*:://;
	my $val = constant($constname, @_ ? $_[0] : 0);
	if ($! != 0) {
#		if ($! =~ /Invalid/) {
			$AutoLoader::AUTOLOAD = $AUTOLOAD;
			goto &AutoLoader::AUTOLOAD;
#		} else {
#		  Carp::croak("Error: Nonexistent Ferret constant '$constname' used");
#		}
	}
	eval "sub $AUTOLOAD { $val }";
	goto &$AUTOLOAD;
}



use strict;



###############################################################################
#
# Package-wide variable definitions
#
my %indexmode;
my %newwords;
my %stoppers;
my %options;
my %picsizes;

my $nettimeout	= 0;	# Seconds before aborting net communications (0=no abt)



###############################################################################
#
# Default list of stoppers (two letters or more only)
#
my @defstoppers =
	qw(and the with for who what where when why how her him them they
	   she its our ours that were will wont would wouldnt could cant
	   couldnt should shouldnt this that are from not can have all but
	   must mustnt these each also some has any use there which let near);



###############################################################################
#
# Set of query aliases
#
my %queryaliases =
(
	AND		=> "&",
	OR		=> "|",
	NOT		=> "!",
	NEAR	=> "10",
);



###############################################################################
#
# Required string adjustments for documents & queries
#
sub AliasSubstitute {
	my $usage = 'Usage: Ferret::AliasSubstitute(\$string, \%aliases)';
	@_ == 2 or croak $usage;
	my($query,$aliases) = @_;

	croak $usage if (ref $query   ne "SCALAR");
	croak $usage if (ref $aliases ne "HASH");

#	print "alias in:  $$query\n";

	my($key,$val);
	while (($key,$val) = each %$aliases) {
		$$query =~ s/\b\Q$key\E\b/$val/gs;
	}

#	print "alias out: $$query\n";
}



sub PrepQuery {
	my $usage = 'Usage: Ferret::PrepQuery(\$query [, \$validwords])';
	@_ >= 1 && @_ <= 2 or croak $usage;
	my($query,$wlist) = @_;

	croak $usage if (ref $query ne "SCALAR");
	croak $usage if ($wlist && ref $wlist ne "SCALAR");

	# Remove any dangerous characters
	$$query =~ y/\$/ /s;
#	$$query =~ s/\$/ /g;

	# Remove any unknown characters
	$$query =~ y/A-Za-z0-9\' \t\n!|&()\"\[\]\?\*\^/ /c;
#	$$query =~ s/[^\w\s!|&()\"]+/ /gs;

	# Separate control characters out into words
	$$query =~ s/([!|&()])/ $1 /g;

	# Make sure quoted strings get evaluated properly
	$$query =~ s/"(.*?)"/ ( " $1 " ) /g;

	# Remove global word-matches
	1 while $$query =~ s/(^|\s)(\?|\*|\[\^.*?\])+($|\s)/ /gs;

	# Handle globbing patterns
	foreach (_FindGlobs($$query)) {
		my $expr = "( " . join(' | ',_ExpandGlob($_,$wlist)) . " )";
		$$query =~ s/\Q$_\E/$expr/;
	}

	# Merge all whitespace together
	$$query =~ s/\s+/ /gs;

	# Remove header and footer whitespace
	$$query =~ s/^\s+|\s+$//;

	# Mark entire query as subquery
	$$query = "( $$query )";
}



###############################################################################
#
# Opening & Closing Indexes
#
sub ReadOptions {
	my $usage = 'Usage: $ferret->ReadOptions()';
	@_ == 1 or croak $usage;
	my($this) = @_;

	croak $usage if (ref $this ne "Ferret");

	my $opts = $this->Options();
	$Ferret::options{$this}	 = $opts;

	if ($opts & &OPT_NOSTOPPERS) {
		$Ferret::stoppers{$this} = "";
	} else {
		$Ferret::stoppers{$this} = $this->DBGetWords("stoppers");
		$Ferret::stoppers{$this} = join("\n",@defstoppers) unless $Ferret::stoppers{$this};
	}
}



sub LocalOpen {
	my $usage = 'Usage: $ferret->LocalOpen(PATHNAME [, WRITEFLAG])';
	@_ >= 2 && @_ <= 3 or croak $usage;
	my($this,$file,$mode) = @_;

	croak $usage if (ref $this ne "Ferret");

	my $open = $this->OpenIndex($file,$mode);

	croak "Error: Could not open index file '$file' -- $!\n       stopped" if $open == -1;
	die	  "Error: Old index incompatible with newer evaluation version -- Please regenerate\n" if $open == -2;
	croak "Error: Evaluation version of \"Ferret\" has expired; stopped" if $open == -3;

	$Ferret::indexmode{$this}= $mode;
	$Ferret::newwords{$this} = "";
	$Ferret::stoppers{$this} = "";

	$this->ReadOptions();
}



sub Open {
	my $usage = 'Usage: $ferret->Open(PATHNAME)';
	@_ == 2 or croak $usage;
	my($this,$file) = @_;

	croak $usage if (ref $this ne "Ferret");

	$this->LocalOpen($file);
}



sub FlushLocal {
	my $usage = 'Usage: $ferret->FlushLocal()';
	@_ == 1 or croak $usage;
	my($this) = @_;

	croak $usage if (ref $this ne "Ferret");

	if ($Ferret::indexmode{$this} && $Ferret::newwords{$this}) {
		my $words = $this->DBGetWords("all");
		my @words = split(/\n/, $words);
		push @words,split(/\n/, $Ferret::newwords{$this});
		$words = join(' ',sort(@words));
		$this->DBPutWords("all",$words);
		$Ferret::newwords{$this} = "";
	}
}



sub Close {
	my $usage = 'Usage: $ferret->Close()';
	@_ == 1 or croak $usage;
	my($this) = @_;

	croak $usage if (ref $this ne "Ferret");

	$this->FlushLocal();
	$this->CloseIndex();
}



###############################################################################
#
# Queries
#
sub QueryRun {
	my $usage = 'Usage: $result = $ferret->QueryRun(QUERYSTRING)';
	@_ == 2 or croak $usage;
	my($this,$query) = @_;

	croak $usage if (ref $this ne "Ferret");

	my $wordlist = $this->DBGetWords("all") if $query =~ m/[\[\]\?\*]/;

	# Run standard aliases
	AliasSubstitute(\$query,\%queryaliases);

	StripWords(\$query, \$Ferret::stoppers{$this});
	PrepQuery(\$query,\$wordlist);

#	print "Prepped query=$query\n";

	while ($query =~ m/\(\s*([^()]*?)\s*\)/s) {
		my(@match,$lidx,$ridx,$index);
		my($op,$lr,$quote,$negate,$range);
		$negate = 0;

#		print "Subquery: $1\n";
#		_Time("Parsed Subquery");

		foreach (split(/ /,$1)) {
			if ($_ eq '"')		{ $quote = !$quote; next; }
			if ($_ eq '!')		{ $negate= 1;		next; }
			if ($_ =~ m/^\d+$/) { $range = $_;		next; }

			if ($_ =~ m/^[&|]$/) {
				$op = $_ if ($lr);
				next;
			}

			if ($_ =~ m/^\$([-\d]+)$/) {
				$ridx = $1;
			} else {
#				print "Query Word '$_'...";
				$ridx = $this->QueryWord($_);
#				print "(#$ridx)\n";
			}

			if ($negate) {
#				print "Negate Result #$ridx... ";
				$index = $this->QueryNegate($ridx);
#				print "(#$index)\n";
				$this->FreeQueryResult($ridx);
				$ridx = $index;
				$negate = 0;
			}
#			print "Match for '$_': @match\n";

			$range = 1 if $quote;

			if ($lr) {
				if ($op eq '|') {
					$index = $this->QueryOr($lidx,$ridx);
				} elsif ($op eq '' || $op eq '&') {
					$index = $this->QueryAnd($lidx,$ridx,$range);
				} else {
					$@ = "Unknown operator '$op'";
					return;
				}
				$this->FreeQueryResult($lidx);
				$this->FreeQueryResult($ridx);
				$lidx = $index;
			} else {
				$lidx = $ridx;
				$lr++;
			}

			$op = '';
		}

		$query =~ s/\(\s*[^()]*?\s*\)/\$$lidx/;
	}


	my($idx) = ($query =~ m/\$([-\d]+)/);
	return($idx);
}



sub QueryMatches {
	my $usage = 'Usage: @matches = $ferret->QueryMatches($result)';
	@_ == 2 or croak $usage;
	my($this,$idx) = @_;

	croak $usage if (ref $this ne "Ferret");

	my $docs = $this->QueryResults($idx);
	my @docs = split(/\n/,$docs);

	if (@docs) {
		@docs = sort { return $b <=> $a if ($b != $a); $a cmp $b } @docs;
		$@ = "";
	} else {
		$@ = "No matches found";
	}

	return(@docs);
}



sub Query {
	my $usage = 'Usage: @matches = $ferret->Query(QUERYSTRING)';
	@_ == 2 or croak $usage;
	my($this,$query) = @_;

	croak $usage if (ref $this ne "Ferret");

	my $result	= $this->QueryRun($query);
	my @matches = $this->QueryMatches($result);
	$this->FreeQueryResult($result);

	return @matches;
}



1;
__END__
