[KinoSearch] [PATCH] find_sentence_boundaries
Father Chrysostomos
sprout at cpan.org
Tue Mar 11 11:57:52 PDT 2008
I found some bugs in
KinoSearch::Highlight::Highlighter::find_sentence_boundaries (entirely
my fault, since I wrote it :-). Here’s a patch to fix it up. It also
simplifies some of the code in create_excerpts.
-------------- next part --------------
Index: perl/t/303-highlighter.t
===================================================================
--- perl/t/303-highlighter.t (revision 3105)
+++ perl/t/303-highlighter.t (working copy)
@@ -19,7 +19,7 @@
package main;
-use Test::More tests => 10;
+use Test::More tests => 15;
binmode( STDOUT, ":utf8" );
@@ -132,3 +132,17 @@
)->create_excerpt( $hit ),
qr/strong/, "... but not another field"
);
+
+KinoSearch::Highlight::Highlighter->import('find_sentence_boundaries');
+my $sentences = 'This is a sentence. ' x 15;
+is_deeply [find_sentence_boundaries($sentences, 101, 150)], [120,140],
+ 'find_sentence_boundaries in list context with explicit args';
+is find_sentence_boundaries($sentences, 101, 150), 120,
+ 'fsb in scalar context with explicit args';
+is find_sentence_boundaries($sentences, 101, 105), undef,
+ 'fsb in scalar context with explicit args, finding nothing';
+is_deeply [find_sentence_boundaries($sentences)],
+ [0,20,40,60,80,100,120,140,160,180,200,220,240,260,280],
+ 'fsb in list context with one arg';
+is find_sentence_boundaries($sentences), 0,
+ 'fsb in scalar context with one arg';
Index: perl/lib/KinoSearch/Highlight/Highlighter.pm
===================================================================
--- perl/lib/KinoSearch/Highlight/Highlighter.pm (revision 3105)
+++ perl/lib/KinoSearch/Highlight/Highlighter.pm (working copy)
@@ -109,13 +109,9 @@
}
# ... otherwise ...
else {
- my $sentence_boundary = find_sentence_boundaries(
- $text, $top, $top + $excerpt_length
- );
- if( defined $sentence_boundary
- and
- $sentence_boundary - $top <= $limit
- ) {
+ if( defined ( my $sentence_boundary = find_sentence_boundaries(
+ $text, $top, $top + $limit
+ ) ) ) {
$top = $sentence_boundary;
$text = substr $text, $top;
}
@@ -216,7 +212,7 @@
# If $start is zero, then 0 is the first boundary
my @bounds;
- $start == 0 and wantarray || return(0), @bounds = 0;
+ $start == 0 and wantarray || (return 0), @bounds = 0;
my $substr = substr $text, $start, $end-$start;
@@ -224,13 +220,14 @@
while ($substr =~ /
(
.*?
- \.\s+
+ \.(?>\s+)
)
+ (?!\z)
/xsmg
)
{
- wantarray or return pos $substr;
- push @bounds, pos $substr;
+ wantarray or return $start + pos $substr;
+ push @bounds, $start + pos $substr;
}
return wantarray ? @bounds : undef;
-------------- next part --------------
More information about the kinosearch
mailing list