fixed the bogocheck, added some more tests, and cleaned up buttest some more
This commit is contained in:
parent
403229b6f6
commit
976de46665
76
Butts.pm
76
Butts.pm
|
@ -50,7 +50,7 @@ sub new {
|
|||
return $self;
|
||||
}
|
||||
|
||||
# accessor for meme
|
||||
# accessors
|
||||
sub meme {
|
||||
my $self = shift;
|
||||
if (@_) {
|
||||
|
@ -59,6 +59,15 @@ sub meme {
|
|||
return $self->{meme}
|
||||
}
|
||||
|
||||
sub replace_freq {
|
||||
my $self = shift;
|
||||
if (@_) {
|
||||
$self->{replace_freq} = $_[0];
|
||||
}
|
||||
return $self->{replace_freq}
|
||||
}
|
||||
|
||||
# helpers
|
||||
sub is_stop_word {
|
||||
my ($self, $word) = @_;
|
||||
return exists $self->{stopwords}->{lc($word)};
|
||||
|
@ -112,48 +121,32 @@ sub buttify {
|
|||
return @words;
|
||||
}
|
||||
|
||||
$self->log("Word indices remaining: ",
|
||||
@word_idxs_len_sorted);
|
||||
|
||||
$self->log('Words in length order: '
|
||||
. join(', ', map { $words[$_] } @word_idxs_len_sorted));
|
||||
|
||||
my @idx_weights = _sq_weight_indices (scalar @word_idxs_len_sorted);
|
||||
$self->log('index1 weightings: ',
|
||||
join(", ", @idx_weights));
|
||||
|
||||
my ($xx_n, $xx_p, $xx_x) = setup_walker_rand(\@idx_weights);
|
||||
|
||||
# keep track of which we've done already so we can pick another.
|
||||
# there's probably a better way of doing this.
|
||||
my $words_butted = {};
|
||||
|
||||
# make sure we're not trying to butt too hard.
|
||||
if ($how_many_butts > @word_idxs_len_sorted) {
|
||||
$how_many_butts = scalar(@word_idxs_len_sorted);
|
||||
}
|
||||
|
||||
$self->log("buttifying with $how_many_butts repetitions");
|
||||
my $words_butted = {};
|
||||
|
||||
my ($non_butted_indices, $xx_n, $xx_p, $xx_x)
|
||||
= $self->_build_weighted_index(\@word_idxs_len_sorted, $words_butted);
|
||||
|
||||
for my $c (0 .. $how_many_butts-1) {
|
||||
|
||||
# Boooooooooogocheck. We really need non-replacement picks.
|
||||
my $idx_to_butt;
|
||||
my $iterations = 0;
|
||||
do {
|
||||
$iterations++;
|
||||
# break out if we've tried too much. Urgh.
|
||||
if ($iterations > 10) {
|
||||
return @words;
|
||||
}
|
||||
my $random_idx = get_walker_rand($xx_n, $xx_p, $xx_x);
|
||||
$idx_to_butt = $word_idxs_len_sorted[$random_idx];
|
||||
} until not exists($words_butted->{$idx_to_butt});
|
||||
my $random_idx = get_walker_rand($xx_n, $xx_p, $xx_x);
|
||||
my $idx_to_butt = $non_butted_indices->[$random_idx];
|
||||
|
||||
$self->log("Butting word idx: $idx_to_butt [",
|
||||
$words[$idx_to_butt], "]");
|
||||
|
||||
$words[$idx_to_butt]
|
||||
= $self->_buttsub($words[$idx_to_butt]);
|
||||
|
||||
$self->log("bogocheck took $iterations iteration" . ($iterations>1?'s':''));
|
||||
$self->log("Butting word idx: $idx_to_butt [", $words[$idx_to_butt], "]");
|
||||
$words[$idx_to_butt] = $self->_buttsub($words[$idx_to_butt]);
|
||||
$words_butted->{$idx_to_butt} = 1;
|
||||
($non_butted_indices, $xx_n, $xx_p, $xx_x)
|
||||
= $self->_build_weighted_index(\@word_idxs_len_sorted, $words_butted);
|
||||
|
||||
}
|
||||
|
||||
return @words;
|
||||
|
@ -195,7 +188,24 @@ sub _buttsub {
|
|||
}
|
||||
|
||||
substr($actual_word, $l, $r) = $butt;
|
||||
return "$lp$actual_word$rp";
|
||||
return $lp . $actual_word . $rp;
|
||||
}
|
||||
|
||||
sub _build_weighted_index {
|
||||
my $self = shift;
|
||||
my (@indices) = @{ shift() },
|
||||
my $butted_indices = shift;
|
||||
|
||||
$self->log("Word indices remaining: ", @indices);
|
||||
|
||||
my @non_butted_indices = grep { !exists ($butted_indices->{$_}) } @indices;
|
||||
my @idx_weights = _sq_weight_indices(scalar @non_butted_indices);
|
||||
|
||||
$self->log('index weightings: ',
|
||||
join(", ", @idx_weights));
|
||||
|
||||
my ($n, $p, $x) = setup_walker_rand(\@idx_weights);
|
||||
return (\@non_butted_indices, $n, $p, $x)
|
||||
}
|
||||
|
||||
sub _sq_weight_indices {
|
||||
|
|
16
buttest.pl
16
buttest.pl
|
@ -1,14 +1,20 @@
|
|||
#!/usr/bin/perl
|
||||
#!/usr/bin/env perl
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Butts;
|
||||
|
||||
my $butt = $ARGV[0] || "butt";
|
||||
my $buttifier = Butts->new(meme => $butt, debug => 1,
|
||||
replace_freq => $ARGV[1] // 0.5);
|
||||
my $butt = $ARGV[0] // "butt";
|
||||
my $replace_freq = $ARGV[1] // 0.5;
|
||||
my $debug = $ARGV[2] // 0;
|
||||
|
||||
my $buttifier = Butts->new(meme => $butt,
|
||||
debug => $debug,
|
||||
replace_freq => $replace_freq);
|
||||
|
||||
print STDERR "$butt repeat rate is " . $buttifier->replace_freq . $/
|
||||
if $debug;
|
||||
|
||||
print STDERR "butt repeat rate is " . $buttifier->{replace_freq} . $/;
|
||||
while(<STDIN>) {
|
||||
|
||||
# The old way
|
||||
|
|
21
t/Butts.t
21
t/Butts.t
|
@ -1,12 +1,12 @@
|
|||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Test::More tests => 25;
|
||||
use Test::More tests => 125;
|
||||
|
||||
BEGIN { use_ok('Butts'); }
|
||||
|
||||
my $meme = "butt";
|
||||
my $butter = Butts->new(meme => $meme);
|
||||
my $butter = Butts->new(meme => $meme, debug => 1);
|
||||
|
||||
isa_ok($butter, 'Butts', 'butter Object');
|
||||
can_ok($butter, qw(buttify buttify_string meme));
|
||||
|
@ -21,7 +21,7 @@ $butter->meme($meme);
|
|||
|
||||
my @buttify_data = qw(The rain in Spain falls mainly on the plain.);
|
||||
|
||||
for (1 .. 10) {
|
||||
for (1 .. 100) {
|
||||
my @output = $butter->buttify(@buttify_data);
|
||||
{
|
||||
local $" = ' ';
|
||||
|
@ -36,6 +36,17 @@ my $buttify_str_sample = "An idle hand is worth two in the bush\n";
|
|||
|
||||
for (1 .. 10) {
|
||||
my $output = $butter->buttify_string($buttify_str_sample);
|
||||
print $output, $/;
|
||||
like( $output, qr/\Q$meme\E/, 'buttify_string has butt');
|
||||
#print $output, $/;
|
||||
like( $output, qr/\Q$meme\E/i, 'buttify_string has butt');
|
||||
}
|
||||
|
||||
my $long_str_sample
|
||||
= "The alias-rejection-mixture method is a general and exact method for the "
|
||||
. "computer generation of random variables from an arbitrary discrete, "
|
||||
. "continuous, or mixed probability distribution.";
|
||||
|
||||
for (1..10) {
|
||||
my $output = $butter->buttify_string($long_str_sample);
|
||||
#print $output, $/;
|
||||
like( $output, qr/\Q$meme\E.*?\Q$meme\E/i, 'buttify_string has double-butt');
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue