* A PHP4 port of John Gruber's SmartyPants plug-in for * Movable Type, BBEdit, Blosxom. * * SmartyPants is Copyright (c) 2003 John Gruber * * * SmartyPants-PHP ported February 2004 by matthew mcglynn * * * SmartyPants-PHP provides a function-based API to the * punctuation translation routines of SmartyPants 1.4.1. * Code specific to Movable Type, BBEdit, and Blosxom has * been removed as it is not useful in a PHP context. * * SmartyPants official home URL: * http://daringfireball.net/projects/smartypants/ * * SmartyPants-PHP official home URL: * http://monauraljerk.org/smartypants-php/ * * Comments from the original PERL code are retained below, marked with '#' * Comments specific to SmartyPants-PHP are marked '//' * */ # SmartyPants - A Plug-In for Movable Type, Blosxom, and BBEdit # by John Gruber # http://daringfireball.net # # See the readme or POD for details, installation instructions, and # license information. # # Copyright (c) 2003 John Gruber # define('SMARTYPANTS_PHP_VERSION', '1.4.1php-r2'); // Control the operation of the SmartyPants() routine by passing // any of the following command modes. To combine modes, 'or' them // together. define('DO_QUOTES', 1); define('DO_BACKTICKS_DOUBLE', 2); define('DO_BACKTICKS_ALL', 4); define('DO_DASHES', 8); define('DO_OLDSCHOOL_DASHES', 16); define('DO_INVERTED_OLDSCHOOL_DASHES', 32); define('DO_ELLIPSES', 64); define('DO_QUOT_CONV', 128); define('DO_STUPEFY', 128); // Set default operational mode define('DEFAULT_OPERATION_MODE', DO_QUOTES | DO_BACKTICKS_DOUBLE | DO_DASHES | DO_ELLIPSES); // keys for $tokens hash define('TOKENS_TYPE_TEXT', 'text'); define('TOKENS_TYPE_TAG', 'tag'); // punctuation inside these tags will not be modified define('TAGS_TO_SKIP', "{<(/?)(?:pre|code|kbd|script)[\s>]}i"); // string SmartyPants ( string input [, int mode] ) // This function provides the main API to the library. In most cases, you'll // simply pass an HTML string to this function. The function will return a // version of your input string, with punctuation made 'smart.' function SmartyPants($text = '', $mode = DEFAULT_OPERATION_MODE) { // quick return for empty string if ($text == '') { return ''; } // default all commands to FALSE, then reset according to $mode $do_quotes = $do_backticks_double = $do_backticks_all = FALSE; $do_dashes = $do_oldschool_dashes = $do_inverted_oldschool_dashes = FALSE; $do_ellipses = $do_stupefy = $convert_quot = FALSE; // setting these flags introduces a layer of abstraction that turned out // to be unnecessary. Such is porting. if ($mode & DO_QUOTES) { $do_quotes = TRUE; } if ($mode & DO_BACKTICKS_DOUBLE) { $do_backticks_double = TRUE; } if ($mode & DO_BACKTICKS_ALL) { $do_backticks_all = TRUE; } if ($mode & DO_DASHES) { $do_dashes = TRUE; } if ($mode & DO_OLDSCHOOL_DASHES) { $do_oldschool_dashes = TRUE; } if ($mode & DO_INVERTED_OLDSCHOOL_DASHES) { $do_inverted_oldschool_dashes = TRUE; } if ($mode & DO_ELLIPSES) { $do_ellipses = TRUE; } if ($mode & DO_QUOT_CONV) { $convert_quot = TRUE; } // tokenize input string -- break it into HTML tags and the text between them. $tokens = array(); _tokenize($text, $tokens); $result = ''; $in_pre = FALSE; # Keep track of when we're inside

 or  tags.

    $prev_token_last_char = '';  # This is a cheat, used to get some context
                                 # for one-character tokens that consist of 
                                 # just a quote char. What we do is remember
                                 # the last character of the previous text
                                 # token, to use as context to curl single-
                                 # character quote tokens correctly.

    foreach  ($tokens as $data) {
        if ($data['type'] == TOKENS_TYPE_TAG) {
            # Don't mess with quotes inside tags.
            $result .= $data['body'];

            // if the current tag contains text that should not be
            // modified, set $in_pre to TRUE
            if (preg_match(TAGS_TO_SKIP, $data['body'], $hits)) {
                $in_pre = ($hits[1] == '') ? TRUE : FALSE;
            }
        } else {
            $t = $data['body'];
            $last_char = substr($t, -1); # Remember last char of this token before processing.
            if (! $in_pre) {

                $t = ProcessEscapes($t);

                if ($convert_quot) {
                    $t = str_replace('"', '"', $t);
                }

                if ($do_dashes) {
                    $t = EducateDashes($t);
                }
                elseif ($do_oldschool_dashes) {
                    $t = EducateDashesOldSchool($t);
                }
                elseif ($do_inverted_oldschool_dashes) {
                    $t = EducateDashesOldSchoolInverted($t);
                }
                
                if ($do_ellipses) {
                    $t = EducateEllipses($t);
                }

                # Note: backticks need to be processed before quotes.
                if ($do_backticks_double || $do_backticks_all) {
                    $t = EducateBackticks($t);
                }
                if ($do_backticks_all) {
                    $t = EducateSingleBackticks($t);
                }

                if ($do_quotes) {
                    if ($t == "'") {
                        # Special case: single-character ' token
                        if (preg_match("/\S/", $prev_token_last_char)) {
                            $t = "’";
                        }
                        else {
                            $t = "‘";
                        }
                    }
                    elseif ($t == '"') {
                        # Special case: single-character " token
                        if (preg_match("/\S/", $prev_token_last_char)) {
                            $t = "”";
                        }
                        else {
                            $t = "“";
                        }
                    }
                    else {
                        $t = EducateQuotes($t);
                    }
                }

                if ($do_stupefy) {
                    $t = StupefyEntities($t);
                }
            } 

            $prev_token_last_char = $last_char;
            $result .= $t;
        }
    }

    return $result;
}



function EducateQuotes($s = '') {
#
#   Parameter:  String.
#
#   Returns:    The string, with "educated" curly quote HTML entities.
#
#   Example input:  "Isn't this fun?"
#   Example output: “Isn’t this fun?”

    # Make our own "punctuation" character class, because the POSIX-style
    # [:PUNCT:] is only available in Perl 5.6 or later:

    // Original PERL:
    // my $punct_class = qr/[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]/;
    // For some reason, \[\\\] fails in PHP; must be \[\]\\

    // quick return for empty string
    if ($s == '') {
        return '';
    }

    $punct_class = <<?\@\[\]\\\^_`{|}~
REGEX;

    # Special case if the very first character is a quote
    # followed by punctuation at a non-word-break. Close the quotes by brute force:
    $s = preg_replace("/^'(?=[$punct_class]\B)/", '’', $s);
    $s = preg_replace("/^\"(?=[${punct_class}]\B)/", '”', $s);


    # Special case for double sets of quotes, e.g.:
    #   He said, "'Quoted' words in a larger quote."
    $s = preg_replace("/\"'(?=\w)/", '“‘', $s);
    $s = preg_replace("/'\"(?=\w)/", '‘“', $s);

    # Special case for decade abbreviations (the '80s):
    $s = preg_replace("/'(?=\d{2}s)/", '’', $s);

    $close_class = "[^ \t\r\n\[\{\(\-]";

    # Get most opening single quotes:
    $pattern = <<Custer's Last Stand."
}xi
REGEX;
    $s = preg_replace($pattern, "$1’", $s);

    # Any remaining single quotes should be opening ones:
    $s = str_replace("'", '‘', $s);

    # Get most opening double quotes:
    $pattern = <<, or a
#               run of text between tags. Each element of the array is a
#               two-element array; the first is either 'tag' or 'text';
#               the second is the actual value.
#
#   Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
#       


    $len = strlen($str);

    $depth = 6;
    $nested_tags = str_repeat('(?:<(?:[^<>]|', $depth);
    $nested_tags = substr($nested_tags, 0, -1);
    $nested_tags .= str_repeat(')*>)', $depth);

    $match = "/(?s:  ) |
               (?s: <\? .*? \?> ) |
               $nested_tags/x";


    $last_tag_end = -1;
    $loops = $offset = 0;
    while (preg_match($match, $str, $hits, PREG_OFFSET_CAPTURE, $offset)) {

        $extracted_tag = $hits[0][0];   // contains the full HTML tag
        $tag_start = (int)$hits[0][1];  // position of captured in string
        $offset = $tag_start + 1;       // tells preg_match where to start on next iteration

        // if this tag isn't next to the previous one, store the interstitial text
        if ($tag_start > $last_tag_end) {
            $tokens[] = array('type' => TOKENS_TYPE_TEXT,
                              'body' => substr($str, $last_tag_end+1, $tag_start-$last_tag_end-1));
        }

        $tokens[] = array('type' => TOKENS_TYPE_TAG,
                          'body' => $extracted_tag);

        $last_tag_end = $tag_start + strlen($extracted_tag) - 1;

    }

    // if text remains after the close of the last tag, grab it
    if ($offset < $len) {
        $tokens[] = array('type' => TOKENS_TYPE_TEXT,
                          'body' => substr($str, $last_tag_end + 1));
    }

    return;
    
}

?>