# Copyright 2002-2008 Josh Clark and Global Moxie, LLC. This code cannot be
# redistributed without permission from globalmoxie.com.  For more
# information, consult your Big Medium license.
#
# $Id: Janitor.pm 3239 2008-08-22 14:59:28Z josh $

package BigMed::Janitor;
use strict;
use warnings;
use utf8;
use Carp;
$Carp::Verbose = 1;
use BigMed;
use BigMed::Trigger;
use BigMed::Site;
use BigMed::Builder;
use BigMed::Plugin;
use BigMed::Log;
use BigMed::JanitorNote;
use BigMed::Comment;
use BigMed::PageAlert;
use BigMed::Search;
use BigMed::Search::Scheduler;
use BigMed::DiskUtil qw(bm_untaint_filepath bm_file_path bm_delete_dir);
use BigMed::Backup;

BigMed::Janitor->add_log(
    limit_to_class => 1,
    outputs        => {
        name     => 'janitor_log',
        filename => 'janitor_log.txt',
    },
);

BigMed::Janitor->add_trigger( 'before_all',        \&_rotate_logs );
BigMed::Janitor->add_trigger( 'before_site_build', \&_find_pub_updates );
BigMed::Janitor->add_trigger( 'before_site_build', \&_find_deferred_overflow );
BigMed::Janitor->add_trigger( 'before_site_build', \&_update_search_index );
BigMed::Janitor->add_trigger( 'after_site_build', \&_trash_old_spam );
BigMed::Janitor->add_trigger( 'after_all', \&_cleanup_extracted );
BigMed::Janitor->add_trigger( 'after_all', \&_backup_files );

sub new {
    my $class = shift;
    my $self = bless {}, $class;
    return $self;
}

sub do_maintenance {
    my $self = shift;
    $self->log( info => 'Janitor: Starting maintenance routine' );
    $self->call_trigger('before_all') or return;
    my $all_sites = BigMed::Site->select() or return;
    $self->_collect_jnotes() or return;

    $self->reset_build_info;
    my $site;
    while ( $site = $all_sites->next ) {
        $self->call_trigger( 'before_site_build', $site ) or return;

        my $rsections      = $self->sections_to_build;
        my $rpages         = $self->pages_to_build;
        my $rremove_detail = $self->detail_to_remove;

        #only need to build if we have a change to make
        my $builder;
        if ( @{$rsections} || @{$rremove_detail} ) {
            $builder = BigMed::Builder->new( site => $site ) or return;
            $self->call_trigger( 'new_builder', $builder ) or return;

            undef $rpages if !@{ $rpages };
            if ($rsections) {    #pages to build
                $builder->build(
                    sections  => $rsections,
                    pages     => $rpages,
                    no_detail => ( $rpages ? undef: 1 ),
                  )
                  or return;
            }
            if ($rremove_detail) {
                foreach my $rargs ( @{$rremove_detail} ) {
                    $builder->remove_old_files( @{$rargs} ) or return;
                }
            }

            $self->log( notice => 'Janitor: Rebuilt relevant pages for '
                  . $self->log_data_tag($site) );
        }

        $self->call_trigger( 'after_site_build', $site, $builder ) or return;

        $self->reset_build_info;
    }
    return if !defined $site;    #error

    $self->call_trigger('after_all') or return;
    $self->log( info => 'Janitor: Maintenance routine complete' );
    return 1;
}

sub reset_build_info {
    my $self = shift;
    foreach my $k (qw(_sections_to_build _pages_to_build _remove_detail)) {
        $self->{$k} = [];
    }
    return;
}

sub add_sections_to_build {
    my $self    = shift;
    my @sec_ids = @_;

    $self->{_sections_to_build} ||= [];
    push @{ $self->{_sections_to_build} }, grep { $_ } @sec_ids;
    return;
}

sub sections_to_build {
    return $_[0]->_gather_unique_array_items('_sections_to_build');
}

sub add_pages_to_build {
    my $self     = shift;
    my @page_ids = @_;

    $self->{_pages_to_build} ||= [];
    push @{ $self->{_pages_to_build} }, grep { $_ } @page_ids;
    return;
}

sub pages_to_build {
    return $_[0]->_gather_unique_array_items('_pages_to_build');
}

sub add_detail_to_remove {
    my $self = shift;
    my $slug = shift;
    defined $slug or croak 'add_detail_to_remove requires slug name';
    my $rsections = shift;
    if ( ref $rsections ne 'ARRAY' ) {
        croak 'add_detail_to_remove requires section array reference';
    }
    return if !@{$rsections};

    push @{ $self->{_remove_detail} }, [$slug, $rsections];
    return;
}

sub detail_to_remove {
    my $self = shift;
    my $rdetail_list = $self->{_remove_detail} || [];
    return \@{$rdetail_list};    #de/re-reference
}

sub set_stash {
    my $self = shift;
    croak 'usage: $janitor->set_stash("key", $value)' if @_ % 2;
    my %arg = @_;
    while ( my ( $k, $v ) = each(%arg) ) {
        $self->{_STASH}->{$k} = $v;
    }
    return $self;
}

sub stash {
    my $self = shift;
    my @keys = grep { defined $_ } @_;
    return wantarray ? ( map { $self->{_STASH}->{$_} } @keys )
      : defined $keys[0] ? $self->{_STASH}->{ $keys[0] }
      : undef;
}

sub jnotes { return $_[0]->{_JNOTES} ? %{ $_[0]->{_JNOTES} } : ();  }

sub _collect_jnotes {
    my $self = shift;
    my $jnotes = BigMed::JanitorNote->select() or return;
    
    my %jnote;
    my $note;
    while ($note = $jnotes->next) {
        my $site = $note->site or next;
        my $action = $note->action or next;
        my @note_target = $note->target;
        if ( @note_target ) {
            my @targets =
              $jnote{$site}->{$action} ? @{ $jnote{$site}->{$action} } : ();
            $jnote{$site}->{$action} = [ sort ( @targets, @note_target ) ];
        }
        else {
            $jnote{$site}->{$action} = 1;
        }
    }
    return if !defined $note;
    $jnotes->trash_all or return;
    $self->{_JNOTES} = \%jnote;
    return 1;
}

sub _gather_unique_array_items {
    my $self    = shift;
    my $key     = shift;
    my $rcached = $self->{$key} || [];
    my %winnow  = map { $_ => 1 } @{$rcached};
    return [keys %winnow];
}

sub _rotate_logs {
    my $self = shift;
    return BigMed::Log->rotate_logs();
}

sub _find_pub_updates {
    my $self = shift;
    my $site = shift;

    my $now = BigMed->bigmed_time();

    foreach my $c_class ( BigMed::Plugin->load_content_types ) {
        my $is_page = $c_class->isa('BigMed::Content::Page');
        my $item;

        #publish items to auto-publish
        my $to_publish = $c_class->select(
            {   site          => $site->id,
                auto_pub_time => { from => '1', to => $now }
            }
        );
        my $item_label = $c_class->data_label;
        my @index_ids;
        while ( $item = $to_publish->next ) {

            #update item status to published
            if ( $item->pub_status ne 'published' ) {
                $item->set_pub_status('published');
                $item->set_pub_time($now);

                #flag for building
                $self->add_sections_to_build( $item->sections );
                if ($is_page) {
                    $self->add_pages_to_build( $item->id );
                    BigMed::PageAlert->notify( 'page_status', $item, $site );
                    push @index_ids, $item->id; #schedule for indexing
                }
                $self->log( notice => 'Janitor: Auto-published '
                      . $self->log_data_tag($item) );
            }
            $item->set_auto_pub_time(undef);
            $item->save or return;
        }
        return if !defined $item;
        schedule_index($site,\@index_ids) if @index_ids;

        #unpublish items to auto-unpublish
        my $to_unpublish = $c_class->select(
            {   site            => $site->id,
                auto_unpub_time => { from => '1', to => $now }
            }
        );
        my @unindex_ids;
        while ( $item = $to_unpublish->next ) {

            #update item status to unpublished (draft, specifically)
            if ( $item->pub_status eq 'published' ) {
                $item->set_pub_status('draft');

                #mark section to build and detail page for deletion
                $self->add_sections_to_build( $item->sections );
                if ($is_page) {
                    $self->add_detail_to_remove( $item->slug, [$item->sections] );
                    BigMed::PageAlert->notify( 'page_status', $item, $site );
                    push @unindex_ids, $item->id; #schedule for de-indexing
                }
    
                $self->log( notice => 'Janitor: Auto-unpublished '
                      . $self->log_data_tag($item) );
            }
            $item->set_pub_time(undef);
            $item->set_auto_unpub_time(undef);
            $item->save;
        }
        return if !defined $item;
        schedule_deindex($site,\@unindex_ids) if @unindex_ids;
    }
    return 1;
}

sub _find_deferred_overflow {
    my ( $self, $site ) = @_;
    my %jnote = $self->jnotes;
    my $id    = $site->id;
    if ( $jnote{$id} && $jnote{$id}->{'build_overflow'} ) {
        $self->add_sections_to_build( @{ $jnote{$id}->{'build_overflow'} } );
    }
    return 1;
}

sub _update_search_index {
    my ( $self, $site ) = @_;

    my %jnote = $self->jnotes;
    my $id    = $site->id;
    my $radd  = $jnote{$id} && $jnote{$id}->{'add_search_index'};
    my $rdel  = $jnote{$id} && $jnote{$id}->{'remove_search_index'};
    return 1 if !$radd && !$rdel;

    require BigMed::Content::Page;
    require BigMed::Format::HTML;
    my $lang = $site->get_pref_value('html_htmlhead_lang') || 'en';
    my $search = BigMed::Search->new( locale => $lang );
    $self->call_trigger( 'before_index', $search, $site ) or return;

    my $site_tag = $self->log_data_tag($site);
    if ($radd) {
        my $select =
          BigMed::Content::Page->select( { site => $id, id => $radd } )
          or return;
        $search->index_page($select) or return;
        $self->log( 'info' => "Janitor: Indexed pages in $site_tag: "
              . join( ', ', @{$radd} ) );
    }
    if ($rdel) {
        $self->log( 'info' => "Janitor: De-indexing pages in $site_tag: "
              . join( ', ', @{$rdel} ) );
        $search->remove_page( { site => $id, pages => $rdel } ) or return;
    }

    return 1;
}

my $OLD_SPAM_AGE = '-15d';

sub _trash_old_spam {
    my ($self, $site, $builder) = @_;
    my $time = BigMed->time_obj;
    $time->shift_time($OLD_SPAM_AGE );
    my $select = BigMed::Comment->select(
        {   site      => $site->id,
            status    => 'spam',
            post_time => { to => $time->bigmed_time }
        }
    );
    return $select->trash_all;
}

sub _cleanup_extracted {
    my $self = shift;
    my $bigmed = BigMed->bigmed;
    my $moxiedata = $bigmed->env('MOXIEDATA') or return 1;
    my $extract =
      bm_untaint_filepath( bm_file_path( $moxiedata, 'worktemp', 'extract' ) )
      or return 1;
    return 1 if !-e $extract;
    
    #gather directories in extract dir that are older than one day
    my $DIR;
    opendir($DIR, $extract)
      or return BigMed->set_io_error( $DIR, 'opendir', $extract, $! );
    my @paths =
      grep { -M $_ > 1 } 
      map { bm_file_path( $extract, $_ ) }
      grep { /\A[a-fA-F0-9]/ } readdir($DIR);
    closedir($DIR);
    foreach my $path ( @paths ) {
        bm_delete_dir($path) or return;
    }
    return 1;
}

sub _backup_files {
    my $self = shift;
    my $bm = BigMed->bigmed;
    return 1 if !$bm->env('BACKUP_FREQ') || !$bm->env('BACKUP_KEEP');

    #run backup within two-hour window of start hour
    my $start_hour = $bm->env('BACKUP_HOUR') || 0;
    my $end_hour = $start_hour + 1;
    my $hour =  (localtime)[2];
    if ($end_hour > 23) {
        return 1 if $hour > ($end_hour - 24) && $hour < $start_hour;
    }
    else {
        return 1 if $hour < $start_hour;
    }
    
    #check to see if we're in our daily backup window,
    #less two hours to keep things as close to start hour as possible
    my $min_age = $bm->env('BACKUP_FREQ') - .083; #in days
    my $backup = BigMed::Backup->new();
    my $last = $backup->last_backup();
    return 1 if $last && $last < $min_age;
    
    $self->call_trigger('before_backup', $backup);
    $backup->do_backup()
      or $self->log( 'alert' => 'Could not complete data backup.' );
    $backup->prune()
      or $self->log( 'alert' => 'Could not prune older backups.' );

    return 1;
}

1;

__END__

head1 BigMed::Janitor

Handles regular, automated maintenance routines for Big Medium.

=head1 DESCRIPTION

BigMed::Janitor manages the maintenance routines that should be
run at regular intervals, including auto-publishing and auto-unpublishing of
content, updating the search index and building deferred pages.
Additional routines can be added as callbacks to any of the several
trigger hooks provided.

It's recommended that any apps using BigMed::Janitor first create
a BigMed::App subclass object before creating any BigMed::Janitor objects
to ensure that any plugins are properly loaded.

=head1 SYNOPSIS

    #add trigger callbacks
    BigMed::Janitor->add_trigger( 'before_all', \&coderef );
    BigMed::Janitor->add_trigger( 'before_site_build', \&coderef2 );
    BigMed::Janitor->add_trigger( 'after_site_build', \&coderef3 );
    BigMed::Janitor->add_trigger( 'after_all', \&coderef4 );

    #create a janitor object
    my $janitor = BigMed::Janitor->new();
    
    #run the routines (returns true value on success, undef on error)
    $janitor->do_maintenance() or $janitor->error_stop;

=head1 MAINTENANCE FLOW

The main event happens with BigMed::Janitor's C<do_maintenance> object
method, which carries out the following steps:

=over 4

=item 1. Run the callbacks registered for the C<before_all> trigger hook.

=item 2. Step through every site in the system and:

=over 4

=item a. Run the callbacks registered for the before_site_build trigger hook.
These callbacks may flag sections and pages to build, and detail pages
to delete. Any pages scheduled for indexing/de-indexing in the search
index via BigMed::Search::Scheduler are also indexed here.

=item b. Rebuild the pages flagged for building by C<before_site_build>
(if any).

=item c. Remove the detail pages flagged for building by C<before_site_build>
(if any).

=item d. Run the callbacks registered for the C<after_site_build> trigger hook.

=back

=item 3. Run the callbacks registered for the C<after_all> trigger hook.

=back

The action here is dictated almost entirely by the registered callbacks.
The callbacks installed by default are:

=over 4

=item * A routine in the C<before_all> trigger hook that handles log
rotation.

=item * A routine in the C<before_all> trigger hook that removes obsolete
status-bar info files.

=item * A routine in the C<before_site_build> trigger hook that updates
content items marked for auto-publishing/unpublishing and flags them for
building/removal in steps 2b and 2c above.

=item * A routine in the C<after_all> trigger hook that cleans up
any file leftovers from batch-upload extractions for the media libraries.

=item * A routine the C<after_all> trigger hook that starts a data backup.

=back

Other actions can be added by registering additional callbacks.

=head1 CALLBACK TRIGGER HOOKS

BigMed::Janitor has four different hooks in the C<do_maintenance> process
where you may register callbacks via the C<add_trigger> method.

These routines should return a true value on success and a false value on
error (they should ideally set a BigMed::Error error in the latter case).
If BigMed::Janitor encounters a false value, the C<do_maintenance>
routine will stop its chores and return an undefined value.

=over 4

=item C<before_all>

This hook is called at the very start of the maintenance process. The callback
routines receive the BigMed::Janitor object as the only argument.

=item C<before_site_build>

This hook is called for each site in the system before any pages are
rebuilt/updated. In fact, it's this hook's callbacks that determine
which sections and pages should be updated, if any.

The callback routines receive two arguments: the BigMed::Janitor object
and the BigMed::Site object for the site currently being processed. These
callbacks can identify sections and pages to be rebuilt or detail pages
that should be removed using the C<add_sections_to_build>,
C<add_pages_to_build> and C<add_detail_to_remove> method.

If no sections are marked to build, no pages will be rebuilt at all. If
sections are marked but no pages are marked, only section pages will
be rebuilt.

=item C<before_index>

This hook is called for each site in the system before pages in that site
are added/removed in the site's search index. It is not called, however,
if the site has no index updates required.

The callback routines receive three arguments: the BigMed::Janitor object,
the BigMed::Search object and the BigMed::Site object.

=item C<new_builder>

This hook is called for each site that requires a page build. The
callback routines receive two arguments: the BigMed::Janitor object
and the new BigMed::Builder object. (The maintenance routine, for
example, uses this method to set a callback on each builder object
to ping the browser during long-running page builds, keeping the http
connection alive.)

=item C<after_site_build>

This hook is called for each site in the system after the pages have
been updated. The callback routines receive three arguments:

=over 4

=item 1. The BigMed::Janitor object

=item 2. The BigMed::Site object

=item 3. The BigMed::Builder object that did the building (undefined
if no building was required).

=back

=item C<after_all>

This hook is called at the very end of the maintenance process. The callback
routines receive the BigMed::Janitor object as the only argument.

=item C<before_backup>

This hook is called before a backup is initiated. The callback routines
receive the BigMed::Janitor object and the BigMed::Backup object as
arguments.

=back

=head1 METHODS

=head2 C<<BigMed::Janitor->new()>>

The constructor method returns a BigMed::Janitor object.

    my $janitor = BigMed::Janitor->new();

=head2 C<<BigMed::Janitor->add_trigger( 'hook_name', \&coderef )>>

Adds a callback to the trigger hook named in the first argument. If multiple
callbacks are registered for the same hook, they are run in the order
in which they are registered.

=head2 C<<$janitor->do_maintenance>>

Runs the maintenance routine and associated callbacks.

=head2 C<<$janitor->jnotes>>

Returns a hash representing any current BigMed::JanitorNotes with
maintenance instructions. When do_maintenance is called, the janitor
object automatically collects and deletes any BigMed::JanitorNotes
in the system. The hash is organized by site, action:

    (   $site_id1 => {
            #actions with targets get target values in sorted array
            'build_overflow' => \@sec_ids,

            #actions without targets just get a true value
            'foobar' => 1,
        },
        $site_id2 => {... etc ...},
    );

=head2 C<<$janitor->add_sections_to_build( $id[, $id2, $id3 ...] )>>

Accepts an array of BigMed::Section object IDs and marks those sections
for building during maintenance. These values are reset after the
C<after_site_build> hook, so this method is really useful only to the
C<before_site_build> callback routine.

Ignores zero, undefined or empty-string values.

=head2 C<<$janitor->add_pages_to_build( $id[, $id2, $id3 ...] )>>

Accepts an array of BigMed::Content::Page object IDs and marks those sections
for building during maintenance. As with C<add_sections_to_build>, these
values are reset after the C<after_site_build> hook, so this method is
really useful only to the C<before_site_build> callback routines.

Ignores zero, undefined or empty-string values.

=head2 C<<$janitor->add_detail_to_remove( $slug, \@section_ids )>>

Marks a detail page to be removed from the published site.
Requires two arguments:

=over 4

=item 1. The slug name of the BigMed::Content::Page object whose detail page
should be removed.

=item 2. A reference to an array of BigMed::Section object IDs for the
sections in which the detail page should be removed.

=back

This method is used only when set by the C<before_site_build> callback
routines.

=head2 C<<$janitor->sections_to_build>>

Returns a reference to an array of the section IDs marked to build, duplicates
removed and in no particular order. This information is reset after the
C<after_site_build> hook is called for each site.

=head2 C<<$janitor->pages_to_build>>

Returns a reference to an array of the BigMed::Content::Page IDs marked to
build, duplicates removed and in no particular order. This information is
reset after the C<after_site_build> hook is called for each site.

=head2 C<<$janitor->detail_to_remove>>

Returns an array reference of array references. These inner references
contain two elements: the slug name and section array reference set
each time by the C<add_detail_to_remove> method.

=head2 C<<$janitor->reset_build_info>>

Resets the C<sections_to_build>, C<pages_to_build> and C<detail_to_remove>
values for the object.

=head2 C<<$janitor->set_stash('keyword', $value)>>

"Stashes" a value in the object with the value in the second argument
keyed to the value in the first, for retrieval via C<stash>. The stash
persists for the entire life of the object and is not reset during
C<do_maintenance> as the other getters/setters are.

=head2 C<<$janitor->stash('keyword')>>

Retrieves the value associated with the identifier argument from the
object's stash.

=head1 AUTHOR & COPYRIGHTS

This module and all Big Medium modules are copyright Josh Clark
and Global Moxie. All rights reserved.

Use of this module and the Big Medium content
management system are governed by Global Moxie's software licenses
and may not be used outside of the terms and conditions outlined
there.

For more information, visit the Global Moxie website at
L<http://globalmoxie.com/>.

Big Medium and Global Moxie are service marks of Global Moxie
and Josh Clark. All rights reserved.

=cut
