# Copyright 2002-2008 Josh Clark and Global Moxie, LLC. This code cannot be
# redistributed without permission from globalmoxie.com.  For more
# information, consult your Big Medium license.
#
# $Id: Email.pm 3043 2008-03-31 14:00:38Z josh $

package BigMed::Email;
use strict;
use warnings;
use Carp;
our @EXPORT_OK = qw(send_email);
use base 'Exporter';
use BigMed;
use BigMed::DiskUtil qw(bm_untaint_filepath);
use String::Multibyte;
use Email::Valid;

#IMPORTANT: Because of character encoding, we deal with byte codes not chars
use bytes;

my $utf8_obj = String::Multibyte->new('UTF8');

sub send_email {

    #deal only with bytes; all strings should be coming in as utf,
    #convert to plain bytes.
    my %msg = map { pack 'C*', unpack 'U0C*', (defined $_ ? $_ : q{}) } @_;

    my $message = _message_string(%msg);
    my $bigmed  = BigMed->bigmed;
    if ( my $sendmail = $bigmed->env('SENDMAILPATH') ) {
        return _send_with_sendmail( $sendmail, $msg{from}, $message );
    }
    elsif ( my $smtp_host = $bigmed->env('SMTPSERVER') ) {
        return _send_with_smtp( $smtp_host, $msg{from}, $msg{to}, $msg{cc},
            $message );
    }
    else {
        return $bigmed->set_error(
            head => 'EMAIL_ERR_HEAD_Could not send mail',
            text => ['EMAIL_ERR_TEXT_Email settings not configured', $!],
        );
    }
}

my $ALT_NL = "\n" eq "\012" ? "\015" : "\012";

sub _message_string {
    my %msg = @_;
    $msg{from} or croak '"from" address required to build email message';
    $msg{to}   or croak '"to" address required to build email message';
    $msg{body} or croak '"body" message required to build email message';

    #vet to/from/cc and extract just the valid address portion
    foreach my $addr_type (qw(from to cc)) {
        my $raw_address = $msg{$addr_type} || next;
        $msg{$addr_type} = Email::Valid->address($raw_address)
          or return BigMed->set_error(
            head => 'EMAIL_ERR_HEAD_Could not send mail',
            text =>
              ['EMAIL_ERR_TEXT_Invalid address', $addr_type, $raw_address],
          );
    }

    #build the to/from/cc headers
    foreach my $field qw(from_name to_name) {
        $msg{$field} ||= q{};
        $msg{$field} = substr( $msg{$field}, 0, 75 )
          if length $msg{$field} > 75;
        $msg{$field} =~ s/[^A-Za-z0-9_\-\%\.' \(\)\&]//msg;
        $msg{$field} =~ s/\A\s+//ms;
        $msg{$field} =~ s/\s+\z//ms;
    }
    my $from =
      $msg{from_name} ? qq{"$msg{from_name}" <$msg{from}>} : $msg{from};
    my $to = $msg{to_name} ? qq{"$msg{to_name}" <$msg{to}>} : $msg{to};

    #cleanup/encode subject line
    my $subject = $msg{subject} ||= q{};

    #probably overkill... but let's be sure
    foreach my $head ( $from, $to, $subject, $msg{cc} ) {
        $head = q{}, next if !defined $head;
        $head =~ s/[\s\015\012]+/ /msg;
        $head =~ s/\A\s+//ms;
        $head =~ s/\s+\z//ms;

        $head =~ s/bcc\:/xxx/msig;
        $head =~ s/Content\-Type\:/xxx/msig;
        $head =~ s/Mime\-Type\:/xxx/msig;
        $head =~ s/cc\:/xxx/msig;
        $head =~ s/to\:/xxx/msig;
    }
    $to .= "\nCc: $msg{cc}" if $msg{cc};
    $subject = _encode_qp_subject($subject);

#cleanup/encode message body; unix-only linebreaks
#updated my original, more naive version with this more robust version,
#including parsing of various unicode newline-ish characters:
#http://www.onlamp.com/pub/a/onlamp/2006/08/17/understanding-newlines.html
#but not for \x{2028}|\x{2029}, which clip parens under Perl 5.6
#and removed \x{0085}|\x{000C} too, since they croak under 5.6.0 without use utf8.
#just need to revisit the format
    my $body = $msg{body};
    $body =~ s/\015\012|$ALT_NL/\n/msgo;
    my $mime;
    if ( $body =~ /[^\x00-\x7f]/ms ) {    #non-ascii characters
        my $charset;
        ( $charset, $body ) = _convert_to_simplest_charset($body);
        ( $mime, $body ) = _encode_qp( $body, $charset );
    }
    if ( !$mime ) {    #make sure lines don't exceed max allowed;
                       # (encoded versions are already chopped)
        $body = _wrap_lines($body);
        $mime =
            qq{Mime-Version: 1.0\nContent-Transfer-Encoding: 7bit\n}
          . qq{Content-Type: text/plain; charset=US-ASCII; format=flowed\n};
    }

    return <<"__END_OF_MAIL__";
Return-Path: <$msg{from}>
From: $from
To: $to
Subject: $subject
$mime
$body

__END_OF_MAIL__
}

sub _send_with_sendmail {
    my ( $sendmail_taint, $from, $message ) = @_;
    my $sendmail = bm_untaint_filepath($sendmail_taint)
      or croak "sendmail path contains illegal characters: $sendmail_taint";

    #use forge switch if from address has simple characters
    my $forge = q{};
    if ( $from =~ /\A([A-Za-z0-9_\-\.\@]+)\Z/ms ) { $forge = "-f $1" }
    my $SENDMAIL_FH;
    open $SENDMAIL_FH, "| $sendmail -t -oi -oem $forge"
      or return BigMed->set_error(
        head => 'EMAIL_ERR_HEAD_Could not send mail',
        text => ['EMAIL_ERR_TEXT_Could not open sendmail', $!],
      );
    print {$SENDMAIL_FH} $message
      or return BigMed->set_error(
        head => 'EMAIL_ERR_HEAD_Could not send mail',
        text => ['EMAIL_ERR_TEXT_Could not print sendmail', $!],
      );
    if ( !(close $SENDMAIL_FH ) ) {
        my $err = $!
          || q{No error was specified, but the problem is probably an }
          . q{incorrect destination address. Please check the address }
          . q{and try again.};
        return BigMed->set_error(
            head => 'EMAIL_ERR_HEAD_Could not send mail',
            text => ['EMAIL_ERR_TEXT_Could not close sendmail', $err],
          );
    }
    return 1;
}

sub _send_with_smtp {
    my ( $smtp_host, $from, $to, $cc, $message ) = @_;

    require Net::SMTP;
    my $smtp = Net::SMTP->new($smtp_host)
      or return BigMed->set_error(
        head => 'EMAIL_ERR_HEAD_Could not send mail',
        text => 'EMAIL_ERR_TEXT_Could not connect to smtp',
      );

    $smtp->mail($from)
      or return BigMed->set_error(
        head => 'EMAIL_ERR_HEAD_Could not send mail',
        text => [
            'EMAIL_ERR_TEXT_smtp rejected address',
            'from', $from, $smtp->code() . ': ' . $smtp->message()
        ],
      );

    $smtp->to($to)
      or return BigMed->set_error(
        head => 'EMAIL_ERR_HEAD_Could not send mail',
        text => [
            'EMAIL_ERR_TEXT_smtp rejected address',
            'to', $to, $smtp->code() . ': ' . $smtp->message()
        ],
      );

    if ($cc) {
        $smtp->cc($cc)
          or return BigMed->set_error(
            head => 'EMAIL_ERR_HEAD_Could not send mail',
            text => [
                'EMAIL_ERR_TEXT_smtp rejected address',
                'cc', $cc, $smtp->code() . ': ' . $smtp->message()
            ],
          );
    }

    # Start the mail
    $smtp->data()
      or return BigMed->set_error(
        head => 'EMAIL_ERR_HEAD_Could not send mail',
        text => [
            'EMAIL_ERR_TEXT_smtp rejected data',
            $smtp->code() . ': ' . $smtp->message()
        ],
      );

    # Send the header and message
    $smtp->datasend($message)
      or return BigMed->set_error(
        head => 'EMAIL_ERR_HEAD_Could not send mail',
        text => [
            'EMAIL_ERR_TEXT_smtp rejected data',
            $smtp->code() . ': ' . $smtp->message()
        ],
      );

    # Send the termination string and close the connection
    $smtp->dataend()
      or return BigMed->set_error(
        head => 'EMAIL_ERR_HEAD_Could not send mail',
        text => [
            'EMAIL_ERR_TEXT_smtp rejected data',
            $smtp->code() . ': ' . $smtp->message()
        ],
      );
    $smtp->quit();
    return 1;
}

sub _encode_qp_subject {
    my $subject = shift;

    #don't do encoding if we can get by with latin1...
    my $charset;
    ( $charset, $subject ) = _convert_to_simplest_charset($subject);
    return $subject unless $subject =~ /[^\x00-\x7f]/ms;

    my ( $subj_mime, $text ) = _encode_qp( $subject, $charset, 1 );
    chomp $text;
    $text =~ s/ /=20/msg;    #no unencoded spaces, per RFC2047

    #RFC 2047 requires that all headers including encoded
    #words be no more than 76 characters and follow the format:
    # =?CHARSET?q?CONTENT?=
    my $leader     = "=?$charset?q?";
    my $max_length = 72 - length($leader) - length('subject: ');

    if ( length($text) > $max_length && $subj_mime ne q{} ) {

        #start over again, encoding characters one by one
        my @chars;
        if ( $charset eq 'utf-8' ) {    #multibyte characters
            @chars = $utf8_obj->strsplit( q{}, $subject );
        }
        else {                          #single-byte characters
            @chars = split( //m, $subject );
        }

        $text = q{};
        my $line = q{};
        while (@chars) {
            my $char = ( _encode_qp( shift(@chars), q{}, 1 ) )[1];
            chomp($char);
            if ( length( $line . $char ) > $max_length ) {
                $text .= "\n " if $text ne q{};
                $text .= "$leader$line?=";
                $line = q{};
            }
            $line .= $char;
        }

        #repeat once more to finish up the final line
        $text .= "\n " if $text ne q{};
        $text .= "$leader$line?=";

    }
    elsif ( $subj_mime ne q{} ) {    #one line of non-ascii
        $text = "$leader$text?=";
    }
    return $text;
}

sub _encode_qp {

    #   Borrowed much of this routine from the MIME::QuotedPrint
    #   routine by Gisle Aas. Mr. Aas makes his library available
    #   as free software. Copyright 1995-1997,2002-2003 Gisle Aas.
    #
    #   Input:
    #   my ($mime, $encoded) = _encode_qp($res, $charset, $encode_ascii);
    #   $res is text to encode.
    #   $charset is the character set of the text if known
    #   (gets included in the mime header that's generated)
    #   $encode_ascii encodes text whether or not it's ascii
    #   (necessary for encoded subject headers that include =
    #   or spaces)
    #
    #   Output:
    #   ($mime, $encoded)
    #   $mime is a mime header appropriate to include in email
    #   header (empty if plain ascii).
    #

    my ( $res, $charset, $encode_ascii ) = @_;
    my $eol  = "\n";
    my $mime = q{};

    #if it's plain ascii, no need to encode, unless specifically requested
    return ( $mime, $res )
      unless ( $res =~ /[^\x00-\x7f]/m || $encode_ascii );

    if ( ord('A') == 193 ) {    # on EBCDIC machines we need translation help
        require Encode;
    }

# Do not mention ranges such as $res =~ s/([^ \t\n!-<>-~])/sprintf("=%02X", ord($1))/eg;
# since that will not even compile on an EBCDIC machine (where ord('!') > ord('<')).
    if ( ord('A') == 193 ) {    # EBCDIC style machine
        if ( ord('[') == 173 ) {
            $res
              =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp1047',$1))))/eg
              ;                 # rule #2,#3
            $res =~ s/([ \t]+)$/
              join('', map { sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp1047',$_)))) }
                   split('', $1)
              )/egm;            # rule #3 (encode whitespace at eol)
        }
        elsif ( ord('[') == 187 ) {
            $res
              =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('posix-bc',$1))))/eg
              ;                 # rule #2,#3
            $res =~ s/([ \t]+)$/
              join('', map { sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('posix-bc',$_)))) }
                   split('', $1)
              )/egm;            # rule #3 (encode whitespace at eol)
        }
        elsif ( ord('[') == 186 ) {
            $res
              =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp37',$1))))/eg
              ;                 # rule #2,#3
            $res =~ s/([ \t]+)$/
              join('', map { sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp37',$_)))) }
                   split('', $1)
              )/egm;            # rule #3 (encode whitespace at eol)
        }
    }
    else {                      # ASCII style machine
        $res
          =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord($1))/eg
          ;                     # rule #2,#3
        $res =~ s/([ \t]+)$/
          join('', map { sprintf("=%02X", ord($_)) }
               split('', $1)
          )/egm;                # rule #3 (encode whitespace at eol)
    }

    # rule #5 (lines must be shorter than 76 chars, but we are not allowed
    # to break =XX escapes.  This makes things complicated :-( )
    my $brokenlines = q{};
    $brokenlines .= "$1=$eol" while $res =~ s/(.*?^[^\n]{73} (?:
         [^=\n]{2} (?! [^=\n]{0,1} $) # 75 not followed by .?\n
        |[^=\n]    (?! [^=\n]{0,2} $) # 74 not followed by .?.?\n
        |          (?! [^=\n]{0,3} $) # 73 not followed by .?.?.?\n
        ))//xsm;

    #prepare the mime info (must end in line break)
    $mime =
      qq~Mime-Version: 1.0\nContent-Type: text/plain; charset=$charset\nContent-Transfer-Encoding: quoted-printable\n~;

    return ( $mime, "$brokenlines$res" );

}    #end encode_qp routine

sub _convert_to_simplest_charset {

    # ($charset, $converted_text) = _convert_to_simplest_charset($text);
    # $charset is '' (ascii), 'iso-8859-1', or 'utf-8'

    my $convert = shift;
    return ( q{}, $convert ) unless $convert =~ /[^\x00-\x7f]/m;   #it's ascii

    #if not utf-8, assume it's a legacy iso-8859-1 encoding
    #(big medium used to deal only with latin 1
    if ( !$utf8_obj->islegal($convert) ) {
        return ( 'iso-8859-1', $convert );
    }

    #utf8 codes for latin1 characters plus microsoft chars
    my $latin1 = qq~
        [\x00-\x7f] |
        \xc2[\xa0-\xbf] |
        \xc3[\x80-\xbf] |
        \xe2\x82\xac    |   #euro symbol
        \xe2\x80\x93    |   #en-dash
        \xe2\x80\x94    |   #em-dash
        \xe2\x80\x98    |   #left single curly quote
        \xe2\x80\x99    |   #right single curly quote
        \xe2\x80\x9c    |   #left double curly quote
        \xe2\x80\x9d    |   #right double curly quote
        \xe2\x80\xa2    |   #bullet
        \xe2\x80\xa6    |   #ellipse
        \xe2\x80\xb9    |   #left single angle quote
        \xe2\x80\xba    |   #right single angle quote
        \xe2\x84\xa2    |   #trademark
        \xc5\xa0        |   #Capital Scaron
        \xc5\xa1        |   #LC Scaron
        \xc5\x92        |   #capital OE ligature
        \xc5\x93        |   #lc oe ligature
        \xc5\xb8        |   #Capital Y with umlaut
        \xcb\x9c        |   #tilde
    ~;

    if ( $convert !~ /^(?:$latin1)*$/xo ) {

        #it's got characters that don't exist in latin1, so stick with utf8
        return ( 'utf-8', $convert );
    }

    #it's utf8 but contains only ascii and latin1 chars.
    #convert to iso-8859-1 encoding. regex recipe from:
    #http://perldoc.perl.org/perluniintro.html#UNICODE-IN-OLDER-PERLS
    $convert
      =~ s/([\xC2\xC3])([\x80-\xBF])/chr(ord($1)<<6&0xC0|ord($2)&0x3F)/eg;

    #convert a few popular characters to plain-text versions to see if
    #we still might coax it into plain text
    if ( $convert =~ /\xe2|\xc5|\xcb/ms ) {
        $convert =~ s/\xe2\x82\xac/EURO/msg;
        $convert =~ s/\xe2\x80\x93/-/msg;
        $convert =~ s/\xe2\x80\x94/--/msg;
        $convert =~ s/\xe2\x80(\x98|\x99)/'/msg;
        $convert =~ s/\xe2\x80(\x9c|\x9d)/"/msg;
        $convert =~ s/\xe2\x80\xb9/</msg;
        $convert =~ s/\xe2\x80\xba/>/msg;
        $convert =~ s/\xe2\x80\xa2/*/msg;
        $convert =~ s/\xe2\x80\xa6/.../msg;
        $convert =~ s/\xe2\x84\xa2/(tm)/msg;
        $convert =~ s/\xc5\x92/OE/msg;
        $convert =~ s/\xc5\x93/oe/msg;
        $convert =~ s/\xc5\xa0/S/msg;
        $convert =~ s/\xc5\xa1/s/msg;
        $convert =~ s/\xc5\xb8/Y/msg;
        $convert =~ s/\xcb\x9c/~/msg;
    }

    my $charset = $convert =~ /[\x80-\xff]/ms ? 'iso-8859-1' : q{};
    return ( $charset, $convert );
}

sub _wrap_lines {

    #make sure lines are no longer than 72 chars and follow the conventions
    #described in RFC2646 for flowed text
    my $text   = shift || q{};
    my $MAX    = 71;
    my $result = q{};
    foreach my $graf ( split( /\n/, $text, -1 ) ) {   #respect all line breaks
        $graf =~ s/ +$//;    #rfc2646 requires spaces trimmed before linefeed
        if ( length($graf) < $MAX ) {
            $graf = q{ } . $graf if $graf =~ /^(>| |From)/;
            $result .= "$graf\n";
            next;
        }
        my @short_lines;
        my $line = q{};
        my @words = split( / /, $graf );
        while (@words) {
            my $word = shift @words;

            #follow rfc2646 space-stuffing convention for protected strings
            #(spaces, > and From)
            $word = q{ } . $word
              if $line eq q{} && ( $word eq q{} || $word =~ /^(>|From)/ );

            my $spc    = $line eq q{} ? 0 : 1;
            my $len    = length($line);
            my $newlen = $len + $spc + length($word);
            if ( $len == 0 && $newlen > $MAX ) {    #looooong word, allow
                $word .= q{ } if @words;    #more words follow this graf
                push @short_lines, $word;
                $line = q{};
            }
            elsif ( $len != 0 && $newlen > $MAX ) {    #hit boundary, do wrap
                push @short_lines, $line . q{ };
                if ( $word eq q{} || $word =~ /^(>|From)/ ) {
                    $word = q{ } . $word;
                }
                $line = $word;
            }
            else {    #still within line length, add space+word
                $line .= ( q{ } x $spc ) . $word;
            }
        }
        push @short_lines, $line if $line ne q{};
        $result .= join( "\n", @short_lines, q{} );
    }
    chop $result;
    return $result;
}

1;

__END__

=head1 NAME

BigMed::Email - Emailer for Big Medium

=head1 DESCRIPTION

BigMed::Email provides a C<send_email> routine that sends e-mail using
whatever method is indicated by the Big Medium system configuration.

=head1 SYNOPSIS

    use BigMed::Email qw(send_email)
    use BigMed::Error;
    
    send_email(
        to        => 'tony@bada-bing.com',
        to_name   => 'Tony Soprano',
        from      => 'meadow@columbia.edu',
        from_name => 'Meadow Soprano',
        cc        => 'carmela@i-heart-nj.com',
        subject   => 'The Stugots',
        body      => 'Dad, can I borrow the boat this weekend?',

    ) or BigMed::Error->error_stop;

=head1 USAGE

BigMed::Email optionally exports a single routine, C<send_email>:

=head2 C<send_email>

Sends an e-mail based on the parameters used, returning a true value if
successful, or undef if unsuccessful (an explanatory error message is also
added to the Big Medium error queue).

This is a simple mailer that is intended to be used to send to a single
address, along with an optional single CC address. The routine handles all
formatting and encoding of the subject and message body (e.g., lines are
chopped into appropriate lengths, non-ASCII text is converted to
quoted-printable, and ASCII text is formatted as flowed text).

All messages are sent as plain text. To provide maximum compatibility with
mail clients, the routine sends the US-ASCII character set when it can,
falling back to ISO-8859-1, and finally to UTF-8 if the message contains
text outside of the ISO-8859-1 character set.

The routine checks the format of the to, from and cc addresses and, if
any are not valid, the message will not be sent.

The routine accepts a hash of arguments, three of which are required:

=over 4

=item * C<< to => $recipient_address >>

The e-mail address of the recipient. (Must be just a single recipient).

=item * C<< from => $sender_address >>

The e-mail address of the sender.

=item * C<< body => $email_message >>

The message body of the e-mail. This text should be in the UTF-8 character
set but otherwise requires no special formatting.

=back

The routine also accepts the following optional arguments:

=over 4

=item * C<< subject => $subject_line >>

The message subject.

=item * C<< to_name => $recipient_name >>

The name of the recipient. This will be added to the "To" line of the email.

=item * C<< from_name => $sender_name >>

The name of the sender. This will be added to the "From" line of the email.

=item * C<< cc => $cc_address >>

A single e-mail address to which the message should be cc'd.

=back

=head1 AUTHOR & COPYRIGHTS

This module and all Big Medium modules are copyright Josh Clark
and Global Moxie. All rights reserved.

Use of this module and the Big Medium content
management system are governed by Global Moxie's software licenses
and may not be used outside of the terms and conditions outlined
there.

For more information, visit the Global Moxie website at
L<http://globalmoxie.com/>.

Big Medium and Global Moxie are service marks of Global Moxie
and Josh Clark. All rights reserved.

=cut

