fixed the bogocheck, added some more tests, and cleaned up buttest some more

This commit is contained in:
shabble 2009-10-28 18:38:10 +00:00
parent 403229b6f6
commit 976de46665
3 changed files with 70 additions and 43 deletions

View File

@ -50,7 +50,7 @@ sub new {
return $self;
}
# accessor for meme
# accessors
sub meme {
my $self = shift;
if (@_) {
@ -59,6 +59,15 @@ sub meme {
return $self->{meme}
}
sub replace_freq {
my $self = shift;
if (@_) {
$self->{replace_freq} = $_[0];
}
return $self->{replace_freq}
}
# helpers
sub is_stop_word {
my ($self, $word) = @_;
return exists $self->{stopwords}->{lc($word)};
@ -112,48 +121,32 @@ sub buttify {
return @words;
}
$self->log("Word indices remaining: ",
@word_idxs_len_sorted);
$self->log('Words in length order: '
. join(', ', map { $words[$_] } @word_idxs_len_sorted));
my @idx_weights = _sq_weight_indices (scalar @word_idxs_len_sorted);
$self->log('index1 weightings: ',
join(", ", @idx_weights));
my ($xx_n, $xx_p, $xx_x) = setup_walker_rand(\@idx_weights);
# keep track of which we've done already so we can pick another.
# there's probably a better way of doing this.
my $words_butted = {};
# make sure we're not trying to butt too hard.
if ($how_many_butts > @word_idxs_len_sorted) {
$how_many_butts = scalar(@word_idxs_len_sorted);
}
$self->log("buttifying with $how_many_butts repetitions");
my $words_butted = {};
my ($non_butted_indices, $xx_n, $xx_p, $xx_x)
= $self->_build_weighted_index(\@word_idxs_len_sorted, $words_butted);
for my $c (0 .. $how_many_butts-1) {
# Boooooooooogocheck. We really need non-replacement picks.
my $idx_to_butt;
my $iterations = 0;
do {
$iterations++;
# break out if we've tried too much. Urgh.
if ($iterations > 10) {
return @words;
}
my $random_idx = get_walker_rand($xx_n, $xx_p, $xx_x);
$idx_to_butt = $word_idxs_len_sorted[$random_idx];
} until not exists($words_butted->{$idx_to_butt});
my $random_idx = get_walker_rand($xx_n, $xx_p, $xx_x);
my $idx_to_butt = $non_butted_indices->[$random_idx];
$self->log("Butting word idx: $idx_to_butt [",
$words[$idx_to_butt], "]");
$words[$idx_to_butt]
= $self->_buttsub($words[$idx_to_butt]);
$self->log("bogocheck took $iterations iteration" . ($iterations>1?'s':''));
$self->log("Butting word idx: $idx_to_butt [", $words[$idx_to_butt], "]");
$words[$idx_to_butt] = $self->_buttsub($words[$idx_to_butt]);
$words_butted->{$idx_to_butt} = 1;
($non_butted_indices, $xx_n, $xx_p, $xx_x)
= $self->_build_weighted_index(\@word_idxs_len_sorted, $words_butted);
}
return @words;
@ -195,7 +188,24 @@ sub _buttsub {
}
substr($actual_word, $l, $r) = $butt;
return "$lp$actual_word$rp";
return $lp . $actual_word . $rp;
}
sub _build_weighted_index {
my $self = shift;
my (@indices) = @{ shift() },
my $butted_indices = shift;
$self->log("Word indices remaining: ", @indices);
my @non_butted_indices = grep { !exists ($butted_indices->{$_}) } @indices;
my @idx_weights = _sq_weight_indices(scalar @non_butted_indices);
$self->log('index weightings: ',
join(", ", @idx_weights));
my ($n, $p, $x) = setup_walker_rand(\@idx_weights);
return (\@non_butted_indices, $n, $p, $x)
}
sub _sq_weight_indices {

View File

@ -1,14 +1,20 @@
#!/usr/bin/perl
#!/usr/bin/env perl
use strict;
use warnings;
use Butts;
my $butt = $ARGV[0] || "butt";
my $buttifier = Butts->new(meme => $butt, debug => 1,
replace_freq => $ARGV[1] // 0.5);
my $butt = $ARGV[0] // "butt";
my $replace_freq = $ARGV[1] // 0.5;
my $debug = $ARGV[2] // 0;
my $buttifier = Butts->new(meme => $butt,
debug => $debug,
replace_freq => $replace_freq);
print STDERR "$butt repeat rate is " . $buttifier->replace_freq . $/
if $debug;
print STDERR "butt repeat rate is " . $buttifier->{replace_freq} . $/;
while(<STDIN>) {
# The old way

View File

@ -1,12 +1,12 @@
use strict;
use warnings;
use Test::More tests => 25;
use Test::More tests => 125;
BEGIN { use_ok('Butts'); }
my $meme = "butt";
my $butter = Butts->new(meme => $meme);
my $butter = Butts->new(meme => $meme, debug => 1);
isa_ok($butter, 'Butts', 'butter Object');
can_ok($butter, qw(buttify buttify_string meme));
@ -21,7 +21,7 @@ $butter->meme($meme);
my @buttify_data = qw(The rain in Spain falls mainly on the plain.);
for (1 .. 10) {
for (1 .. 100) {
my @output = $butter->buttify(@buttify_data);
{
local $" = ' ';
@ -36,6 +36,17 @@ my $buttify_str_sample = "An idle hand is worth two in the bush\n";
for (1 .. 10) {
my $output = $butter->buttify_string($buttify_str_sample);
print $output, $/;
like( $output, qr/\Q$meme\E/, 'buttify_string has butt');
#print $output, $/;
like( $output, qr/\Q$meme\E/i, 'buttify_string has butt');
}
my $long_str_sample
= "The alias-rejection-mixture method is a general and exact method for the "
. "computer generation of random variables from an arbitrary discrete, "
. "continuous, or mixed probability distribution.";
for (1..10) {
my $output = $butter->buttify_string($long_str_sample);
#print $output, $/;
like( $output, qr/\Q$meme\E.*?\Q$meme\E/i, 'buttify_string has double-butt');
}