[KinoSearch] [PATCH] find_sentence_boundaries

Father Chrysostomos sprout at cpan.org
Tue Mar 11 11:57:52 PDT 2008


I found some bugs in  
KinoSearch::Highlight::Highlighter::find_sentence_boundaries (entirely  
my fault, since I wrote it :-). Here’s a patch to fix it up. It also  
simplifies some of the code in create_excerpts.

-------------- next part --------------
Index: perl/t/303-highlighter.t
===================================================================
--- perl/t/303-highlighter.t	(revision 3105)
+++ perl/t/303-highlighter.t	(working copy)
@@ -19,7 +19,7 @@
 
 package main;
 
-use Test::More tests => 10;
+use Test::More tests => 15;
 
 binmode( STDOUT, ":utf8" );
 
@@ -132,3 +132,17 @@
     )->create_excerpt( $hit ),
     qr/strong/, "... but not another field"
 );
+
+KinoSearch::Highlight::Highlighter->import('find_sentence_boundaries');
+my $sentences = 'This is a sentence. ' x 15;
+is_deeply [find_sentence_boundaries($sentences, 101, 150)], [120,140],
+    'find_sentence_boundaries in list context with explicit args';
+is find_sentence_boundaries($sentences, 101, 150), 120,
+    'fsb in scalar context with explicit args';
+is find_sentence_boundaries($sentences, 101, 105), undef,
+    'fsb in scalar context with explicit args, finding nothing';
+is_deeply [find_sentence_boundaries($sentences)],
+    [0,20,40,60,80,100,120,140,160,180,200,220,240,260,280],
+    'fsb in list context with one arg';
+is find_sentence_boundaries($sentences), 0,
+    'fsb in scalar context with one arg';
Index: perl/lib/KinoSearch/Highlight/Highlighter.pm
===================================================================
--- perl/lib/KinoSearch/Highlight/Highlighter.pm	(revision 3105)
+++ perl/lib/KinoSearch/Highlight/Highlighter.pm	(working copy)
@@ -109,13 +109,9 @@
     }
     # ... otherwise ...
     else {
-        my $sentence_boundary = find_sentence_boundaries(
-            $text, $top, $top + $excerpt_length
-        );
-        if( defined $sentence_boundary
-              and
-            $sentence_boundary - $top <= $limit
-        ) {
+        if( defined ( my $sentence_boundary = find_sentence_boundaries(
+            $text, $top, $top + $limit
+        ) ) ) {
             $top = $sentence_boundary;
             $text = substr $text, $top;
         }
@@ -216,7 +212,7 @@
 
     # If $start is zero, then 0 is the first boundary
     my @bounds;
-    $start == 0 and wantarray || return(0), @bounds = 0;
+    $start == 0 and wantarray || (return 0), @bounds = 0;
 
     my $substr = substr $text, $start, $end-$start;
 
@@ -224,13 +220,14 @@
     while ($substr =~ /
             (
             .*?
-            \.\s+
+            \.(?>\s+)
             )
+            (?!\z)
             /xsmg
         )
     {
-        wantarray or return pos $substr;
-        push @bounds, pos $substr;
+        wantarray or return $start + pos $substr;
+        push @bounds, $start + pos $substr;
     }
 
     return wantarray ? @bounds : undef;
-------------- next part --------------





More information about the kinosearch mailing list