# Copyright 2002-2008 Josh Clark and Global Moxie, LLC. This code cannot be
# redistributed without permission from globalmoxie.com.  For more
# information, consult your Big Medium license.
#
# $Id: Result.pm 3043 2008-03-31 14:00:38Z josh $

package BigMed::Search::Result;
use strict;
use warnings;
use utf8;
use Carp;
$Carp::Verbose = 1;
use BigMed::Site;
use BigMed::Content::Page;
use BigMed::Filter;
use BigMed::Error;

my $NUM_RESULTS = 10;    #default number of search results to return

sub new {
    my $class = shift;
    my %param = @_;

    my $site = $param{site};
    my $sid;
    if ( !ref $site ) {
        croak 'site object or id must be specified in site parameter'
          if !$site || $site =~ /\D/ms;
        $sid = $site;
        undef $site;
    }
    else {
        croak 'reference is not a site object in site parameter'
          if ref $site ne 'BigMed::Site';
        $sid = $site->id;
    }
    croak 'must specify pages or summary parameter in result constructor'
      if !$param{pages} && !$param{summary};

    my $start   = ( ( $param{start}   || 1 ) + 0 )            || 1;
    my $results = ( ( $param{results} || $NUM_RESULTS ) + 0 ) || $NUM_RESULTS;

    my $self = bless {
        site_id      => $sid,
        site_obj     => $site,
        start        => $start,
        want_results => $results
    }, $class;

    my $rsummaries =
        $param{pages}
      ? $self->_summarize_pages( $param{pages} )
      : $self->_format_custom( $param{summary}, $param{total_results} );
    $self->{summary} = $rsummaries or return;

    return $self;
}

sub total_results {
    return $_[0]->{total_results};
}

sub num_results {
    return $_[0]->{num_results};
}

sub start_position {
    return $_[0]->{start};
}

sub result_set {
    return @{ $_[0]->{summary} };
}

sub site_id {
    return $_[0]->{site_id};
}

sub site_obj {
    my $self = shift;
    return $self->{site_obj} if $self->{site_obj};
    my $site = BigMed::Site->fetch( $self->{site_id} );
    if ( !$site ) {
        return if !defined $site;
        return BigMed::Error(
            head => 'BM_No such site',
            text => 'BM_TEXT_No such site',
        );
    }
    $self->{site_obj} = $site;
    return $site;
}

sub _summarize_pages {
    my ( $self, $rpages ) = @_;
    my $count = @{$rpages};
    $self->{total_results} = $count;
    if ( !$count ) {
        $self->{num_results} = $self->{start} = 0;
        return [];
    }

    #convert to zero-based start and end, and get the slice
    my $start = $self->{start};    #1-based
    if ( $self->{start} > $count ) {    #give last page
        $start         = $count - $self->{want_results} + 1;
        $start         = 1 if $start < 1;
        $self->{start} = $start;
    }
    $start--;                           #0-based
    my $end = $start + $self->{want_results} - 1;
    $end = $count - 1 if $end >= $count;
    my @find = @{$rpages}[$start .. $end];

    #need site object to get active page urls
    my $site  = $self->site_obj or return;
    my $rkids = [$site->all_active_descendants_ids()];

    #gather summaries
    my @summaries;
    foreach my $pid (@find) {
        my $page =
          BigMed::Content::Page->fetch(
            { site => $self->{site_id}, id => $pid } );
        if ( !$page ) {
            return if !defined $page;    #i/o error
            next;                        #no such page
        }
        my $url = $page->active_page_url( $site, undef, { rkids => $rkids } )
          or next;
        my $title = $page->title;
        $title = 'Untitled' if !$title;
        my $base_text =
          BigMed::Filter->inline_single_graf(
            BigMed::Filter->filter( $page->description ) )
          || BigMed::Filter->inline_single_graf(
            BigMed::Filter->filter( $page->content ) )
          || $page->meta_description;
        my $summary =
          _summary_from_html( $base_text );
        $summary ||= $title;

        push @summaries,
          { title    => $title,
            summary  => $summary,
            url      => $url,
            mod_time => $page->mod_time,
          };
    }
    $self->{num_results} = scalar @summaries;
    return \@summaries;
}

sub _format_custom {
    my ( $self, $rsum, $total ) = @_;
    my @summaries;
    foreach my $rhash ( @{$rsum} ) {
        push @summaries,
          { title    => $rhash->{title},
            summary  => $rhash->{summary},
            url      => $rhash->{url},
            mod_time => $rhash->{mod_time},
          };
    }
    $self->{num_results} = @summaries;
    $self->{total_results} = defined $total ? $total : @summaries;
    return \@summaries;
}

sub _summary_from_html {
    my $text = shift;
    if ( !$text ) {
        $text = q{} if !defined $text;
        return $text;
    }

    $text =~ s/<!\-\-.*?\-\->//msg;    #kill comments

    #mild attempt to remove contents of bad containing tags
    $text =~ s/<\s*
      (script|style|select|head|textarea)
      [^>]*>
      .+?
      <\/\s*\1[^>]*>//xmsgi;
    $text =~ s/<[^>]+>//msg;           #remaining tags

    $text =~ s/\A\s+//msg;
    $text =~ s/\s+\z//msg;
    $text =~ s/\s{2,}/ /msg;
    if ( length $text > 250 ) {
        $text = substr( $text, 0, 250 ) . '...';
    }
    return $text;
}

1;
__END__

=head1 NAME

BigMed::Search::Result - Object for communicating search results

=head1 SUMMARY

    use BigMed::Search::Result;
    
    $result = BigMed::Search::Result->new(
        site => $site_object_or_id,
        pages => \@sorted_page_ids, #complete result set
        start => 21, #where to start in the result set (one-based)
        results => 10, #number of results to return
    );
    
    my $total = $result->total_results; #total number of matches
    my $this_page = $result->num_results; #number in this result set
    my $start = $result->start_position; #one-based start for this set

    foreach my $rhash ( $result->result_set ) {
        print '<p><a href="', $rhash->{url}, '">';
        print $rhash->{title}, '</a><br />';
        print $rhash->{summary}, '<br />';
        print $rhash->{mod_time}, '</p>';
    }
    
=head1 DESCRIPTION

BigMed::Search's C<search> method returns a BigMed::Search::Result
object to communicate the number of overall matches and the current
page of search results. BigMed::Search::Result objects can be
created either by providing the complete array of page ID search
results, or an array of hash references with custom-generated
results for BigMed::Search subclasses using external search engines.

=head1 METHODS

=head2 C<new>

    #create an object from a complete set results (array of page ids)
    $result = BigMed::Search::Result->new(
        site => $site_object_or_id,
        pages => \@sorted_page_ids,
    );
    
    #create an object from an array of custom summaries
    my @summaries = (
        {
            title => 'Global Moxie',
            url   => 'http://globalmoxie.com/',
            summary => 'The hypertext laboratory of Josh Clark',
            mod_time => '2007-08-13 16:32:44' #GMT time zone
        },
        {
            title => 'Paris Muse',
            url   => 'http://www.parismuse.com/',
            summary => 'Private museum tours for the creative traveler',
            mod_time => '2007-04-21 12:11:59' #GMT time zone
        },
    );
    $result = BigMed::Search::Result->new(
        site => $site_object_or_id,
        summary => \@summaries,
        total_results => 379,
        start => 378,
    );

Returns a BigMed::Search::Result object. Parameters are provided in the
argument hash. The method requires two parameter: The C<site> and either
the C<pages> or C<summary> parameter. The accepted parameters are:

=over 4

=item * C<site>

Required. The site object or id for which results are being returned. If
you already have a site object in hand, providing the actual object gives
a modest performance boost when processing via the C<pages> method, since
it saves re-loading the site and its sections.

=item * C<pages>

Reference to the sorted array of all matching page IDs. The C<start>
and C<results> parameters determine which pages will be returned by
the object's C<result_set> method.

=item * C<summary>

If you're creating your own BigMed::Search subclass, you may need to
populate the result set with your own external data. If so, provide
a reference to a sorted array of hash references, each containing
a summary of a search result. Each hash reference should contain the
following key/value pairs (the same as you can retrieve via the C<result_set>
method):

=over 4

=item * title

=item * url

=item * summary

=item * mod_time

GMT time zone in this format: YYYY-MM-DD HH:MM:SS

=back

Unlike when you use the C<pages> parameter, all items in C<summary>
are returned via C<result_set>; the C<start> and C<results> parameters
have no effect on which results are returned.

=item * C<results>

When using the C<pages> parameter, this controls the maximum number of
results to return in the C<result_set> method. The default is 10.

This parameter has no effect when you use the C<summary> parameter
(all results in the C<summary> parameter are always returned by
C<result_set>).

=item * C<start>

When using C<pages> parameter, this sets the desired start position in the
overall set of results to start returning values for the C<result_set>
method. (This value may be adjusted if, for example, the start is set
higher than the number of available results). The default is 1.

When using the C<summary> parameter, C<start> has no effect on the 
results returned by C<result_set>. However, it is used to determine
the value returned by C<start_position>.

=item * C<total_results>

When using the C<summary> parameter, this value can be set to determine
the value returned by the C<total_results> method (otherwise,
C<total_results> returns the same result as C<num_results>).

This parameter is ignored when using the C<pages> parameter.

=back

=head2 C<result_set>

    my @results = $result->result_set;
    foreach my $rhash ( @results ) {
        print '<p><a href="', $rhash->{url}, '">';
        print $rhash->{title}, '</a><br />';
        print $rhash->{summary}, '<br />';
        print $rhash->{mod_time}, '</p>';
    }

Returns an array of hash references with the requested "slice" of all
search results. The size and position of this slice are determined by
the C<start> and C<results> parameters passed to the C<new> method.

The hash references contain the following key/value pairs:

=over 4

=item * title

=item * url

=item * summary

=item * mod_time (GMT time zone: YYYY-MM-DD HH:MM:SS)

=back

=head2 C<num_results>

    my $num = $result->num_results;

The number of results returned by C<result_set>.

=head2 C<total_results>

    my $total = $result->total_results;

The total number of results in the overal search (of which C<result_set>
returns just a slice).

=head2 C<start_position>

    my $start = $result->start_position;

The start position of the result set relative to the complete set of
results.

=head2 C<site_id>

    my $site_id = $result->site_id;

The BigMed::Site id of the site associated with these search results.

=head2 C<site_obj>

    my $site = $result->site_obj;

The BigMed::Site object for the site associated with these search results.

=head1 AUTHOR & COPYRIGHTS

This module and all Big Medium modules are copyright Josh Clark
and Global Moxie. All rights reserved.

Use of this module and the Big Medium content management system
are governed by Global Moxie's software licenses and may not be
used outside of the terms and conditions outlined there.

For more information, visit the Global Moxie website at
L<http://globalmoxie.com/>.

Big Medium and Global Moxie are service marks of Global Moxie
and Josh Clark. All rights reserved.

=cut

