# Copyright 2002-2008 Josh Clark and Global Moxie, LLC. This code cannot be
# redistributed without permission from globalmoxie.com.  For more
# information, consult your Big Medium license.
#
# $Id: Stopwords.pm 3043 2008-03-31 14:00:38Z josh $

package BigMed::Search::Stopwords;
use strict;
use warnings;
use utf8;

my @LOCALES = qw(CS DA DE EN ES FI FR IT NL NO PT SV);
my %CAN_LOCALE; @CAN_LOCALE{@LOCALES} = @LOCALES;

#universal stop words
my @ALL_LOCALE = qw(www);
my %ALL_STOP;
@ALL_STOP{@ALL_LOCALE} = @ALL_LOCALE;

sub hashref {
    my ($class, $locale) = @_;
    $locale = uc substr($locale, 0, 2);
    return {%ALL_STOP} if !$CAN_LOCALE{$locale};

    require "BigMed/Search/Stopwords/$locale.pm";
    my $locale_class = "BigMed::Search::Stopwords::$locale";
    my $rhash = $locale_class->hashref;
    foreach my $stop (@ALL_LOCALE) {
        $rhash->{$stop} = 1;
    }
    return $rhash;
}

1;
__END__

=head1 NAME

BigMed::Search::Stopwords - Returns lists of locale-specific search stopwords

=head1 DESCRIPTION

A "stopword" is a word that is excluded from the search engine index because
it is so common as to be meaningless for searches. Omitting these stopwords
saves space and promotes accuracy.

This module provides hashes of stopwords. Comprehensive lists are provided
for the following languages:

=over 4

=item * Czech (cs)

=item * Danish (da)

=item * German (de)

=item * English (en)

=item * Spanish (es)

=item * Finnish (fi)

=item * French (fr)

=item * Italian (it)

=item * Dutch (nl)

=item * Norwegian (no)

=item * Portuguese (pt)

=item * Sweedish (sv)

=back

All other languages receive a more modest list of words that are omitted
from all languages (e.g., "www").

=head1 USAGE

    my $rhash = BigMed::Search::Stopwords->hashref( 'en' );

=head1 METHOD

=head2 C<<BigMed::Search::Stopwords->hashref( $iso_code )>>

Returns a hash reference where keys are the language's stop words and
values are 1.

=head1 AUTHOR & COPYRIGHTS

This module and all Big Medium modules are copyright Josh Clark
and Global Moxie. All rights reserved.

Use of this module and the Big Medium content
management system are governed by Global Moxie's software licenses
and may not be used outside of the terms and conditions outlined
there.

For more information, visit the Global Moxie website at
L<http://globalmoxie.com/>.

Big Medium and Global Moxie are service marks of Global Moxie
and Josh Clark. All rights reserved.

=cut
