new and improved Butts2.pm. Will replace Butts.pm once I fix everything else that uses the API. SVN branches confuse me :(

This commit is contained in:
shabble 2009-10-28 15:37:12 +00:00
parent fadca1050c
commit bd3c7f433f
4 changed files with 300 additions and 8 deletions

252
Butts2.pm Normal file
View File

@ -0,0 +1,252 @@
package Butts2;
use strict;
use warnings;
use Math::Random;
use TeX::Hyphen;
use Carp;
use fields qw/replace_freq
meme
hyphen_file
stopwords_file
hyphenator
stopwords
debug/;
sub new {
my Butts2 $self = shift;
unless (ref $self) {
$self = fields::new($self);
}
my %args = (hyphen_file => 'hyphen.tex',
stopwords_file => 'stopwords',
debug => 0,
meme => 'butt',
replace_freq => (1/11),
@_);
foreach my $key (keys %args) {
$self->{$key} = $args{$key};
}
$self->{hyphenator} = new TeX::Hyphen((file => $self->{hyphen_file}))
or croak "Couldn't create TeX::Hyphen instance from " . $self->{hyphen_file};
my @stopwords;
if (open my $sfh, $self->{stopwords_file}) {
chomp(@stopwords = <$sfh>);
close $sfh;
} else {
carp "Couldn't read stopwords file "
. $self->{stopwords_file} . ' ' . $!;
@stopwords = qw/a an and or but it in its It's it's the of you I i/;
}
$self->{stopwords} = { map { lc($_) => 1 } @stopwords };
return $self;
}
# accessor for meme
sub meme {
my $self = shift;
if (@_) {
$self->{meme} = $_[0];
}
return $self->{meme}
}
sub is_stop_word {
my ($self, $word) = @_;
return exists $self->{stopwords}->{lc($word)};
}
sub is_meme {
my ($self, $word) = @_;
return lc($word) eq lc($self->{meme});
}
sub buttify_string($_) {
my $self = shift;
# glom a string from $_ if we didn't get one passed.
my $str = (@_ ? $_[0] : $_);
chomp($str);
my @words = split /\s+/, $str;
return join(" ", $self->buttify(@words));
}
sub buttify {
my ($self, @words) = @_;
my $how_many_butts = int(@words * $self->{replace_freq}) + 1;
my $debug = $self->{debug};
# sort indices by word length
my @word_idxs_len_sorted = do {
my $c;
map { $_->[0] }
sort { $b->[1] <=> $a->[1] }
map { [$c++ , length($_) ] } @words;
};
# remove stop words
@word_idxs_len_sorted = grep {
my $word = $words[$_];
my $is_word = $word !~ /^[\d\W+]+$/;
my $is_stop = $self->is_stop_word($word);
my $is_meme = $self->is_meme($word);
$is_word and not $is_stop and not $is_meme;
} @word_idxs_len_sorted;
# bail out if we've got nothing left. This happens
# when a string is comprised entirely of stop-words.
unless (@word_idxs_len_sorted) {
$self->log("Couldn't buttify ", join(' ', @words),
": entirely stopwords");
return @words;
}
$self->log("Words remaining: ",
@word_idxs_len_sorted);
$self->log('Words in length order: '
. join(', ', map { $words[$_] } @word_idxs_len_sorted));
my @idx_weights = _sq_weight_indices (scalar @word_idxs_len_sorted);
$self->log('index1 weightings: ',
join(", ", @idx_weights));
my ($xx_n, $xx_p, $xx_x) = setup_walker_rand(\@idx_weights);
# keep track of which we've done already so we can pick another.
# there's probably a better way of doing this.
my $words_butted = {};
$self->log("buttifying with $how_many_butts repetitions");
for my $c (0 .. $how_many_butts-1) {
# Boooooooooogocheck. We really need non-replacement picks.
my $idx_to_butt;
do {
my $random_idx = get_walker_rand($xx_n, $xx_p, $xx_x);
$idx_to_butt = $word_idxs_len_sorted[$random_idx];
} until not exists($words_butted->{$idx_to_butt});
$self->log("Butting word idx: $idx_to_butt [", $words[$idx_to_butt], "]");
$words[$idx_to_butt] = $self->_buttsub($words[$idx_to_butt]);
$words_butted->{$idx_to_butt} = 1;
}
return @words;
}
sub _buttsub {
my ($self, $word) = @_;
my $meme = $self->{meme};
# split off leading and trailing punctuation
my ($lp, $actual_word, $rp) = $word =~ /^([^A-Za-z]*)(.*?)([^A-Za-z]*)$/;
return $word unless $actual_word;
my @points = (0, $self->{hyphenator}->hyphenate($actual_word));
my $factor = 2;
my $length = scalar @points;
my $replace = $length - 1 - int(rand($length ** $factor) ** (1 / $factor));
push @points, length($actual_word);
my $l = $points[$replace];
my $r = $points[$replace + 1] - $l;
while (substr($actual_word, $l + $r, 1) eq 't') {
$r++;
}
while ($l > 0 && substr($actual_word, $l - 1, 1) eq 'b') {
$l--;
}
my $sub = substr($actual_word, $l, $r);
my $butt = lc($meme);
if ($sub eq uc $sub) {
$butt = uc($meme);
} elsif ($sub =~/^[A-Z]/) {
$butt = ucfirst($meme);
}
substr($actual_word, $l, $r) = $butt;
return "$lp$actual_word$rp";
}
sub _sq_weight_indices {
my $max = shift;
return map { $max-- ** 2 } (0..$max-1);
}
# stealed frm http://code.activestate.com/recipes/576564/
# and http://prxq.wordpress.com/2006/04/17/the-alias-method/
# Copyright someone maybe somewhere?
sub setup_walker_rand {
my ($weight_ref) = @_;
my @weights = @$weight_ref;
my $n = scalar @weights;
my @in_x = (-1) x $n;
my $sum_w = 0;
$sum_w += $_ for @weights;
# normalise weights to have an average value of 1.
@weights = map { $_ * $n / $sum_w } @weights;
my (@short, @long);
my $i = 0;
# split into long and short groups (excluding those which == 1)
for my $p (@weights) {
if ($p < 1) {
push @short, $i;
} elsif ($p > 1) {
push @long, $i;
}
$i++;
}
# build alias map by combining short and long elements.
while (scalar @short and scalar @long) {
my $j = pop @short;
my $k = $long[-1];
$in_x[$j] = $k;
$weights[$k] -= (1 - $weights[$j]);
if ($weights[$k] < 1) {
push @short, $k;
pop @long;
}
# printf("test: j=%d k=%d pk=%.2f\n", $j, $k, $prob[$k]);
}
return ($n, \@weights, \@in_x)
}
sub get_walker_rand {
my ($n, $prob, $in_x) = @_;
my ($u, $j);
$u = random_uniform(1,0,1);
$j = random_uniform_integer(1, 0, $n-1);
return ($u <= $prob->[$j]) ? $j : $in_x->[$j];
}
sub log {
my ($self, @msg) = @_;
if ($self->{debug}) {
print join(" ", @msg) . $/;
}
}
1;

21
buttest2.pl Executable file
View File

@ -0,0 +1,21 @@
#!/usr/bin/perl
use strict;
use warnings;
use Butts2;
my $butt = $ARGV[0] || "butt";
my $buttifier = Butts2->new(meme => $butt, debug => 1,
replace_freq => $ARGV[1] || 0.5);
while(<STDIN>) {
# The old way
# chomp
# print join(" ", $buttifier->buttify(split(/\s+/, $_)))."\n";
# The new way
print $buttifier->buttify_string, $/;
}

View File

@ -28,7 +28,7 @@ sub init {
my $self = shift;
$self->{settings}->{friends} = {};
$self->{settings}->{friends} = {};
$self->{settings}->{enemies} = {};
$self->load_config(0);
@ -185,8 +185,20 @@ sub kicked {
return;
}
# TODO: refactor these 3 better. Emote should never have to deal with commands
# or prefixes. Just a message to be re-butted.
sub emoted {
my ($self, $ref) = @_;
$self->handle_said_emoted($ref, 1);
}
sub said {
my ($self, $ref) = @_;
$self->handle_said_emoted($ref, 0);
}
sub handle_said_emoted {
my ($self, $ref, $reply_as_emote) = @_;
# slicin' ma hashes.
my ($channel, $body, $address, $who) =
@{$ref}{qw/channel body address who/};
@ -214,7 +226,7 @@ sub said {
$self->log("BUTT: Might butt\n");
if ($self->to_butt_or_not_to_butt($who)) {
$self->log("BUTT: Butting $who in [$channel]\n");
$self->buttify_message($who, $channel, $body, 0);
$self->buttify_message($who, $channel, $body, $reply_as_emote, 0);
}
return;
@ -318,7 +330,6 @@ sub handle_pm_command {
}
} elsif ($cmd eq 'change-nick') {
#TODO: this
unless ($self->config_bool('changenick')) {
$self->pm_reply($who, "Sorry, changing nicks is disabled.");
return 1;
@ -379,14 +390,17 @@ sub handle_channel_command {
return 1;
}
return 0; # TODO: unimplemented.
return 0;
# TODO: !stopit - adds them to the enemies list.
# TODO: !butt - randomly butts something?
}
sub buttify_message {
my ($self, $who, $where, $what, $prefix_addressee) = @_;
my ($self, $who, $where,
$what, $reply_as_emote,
$prefix_addressee) = @_;
my $meme = $self->config('meme');
$prefix_addressee = 0 unless defined $prefix_addressee;
@ -395,8 +409,13 @@ sub buttify_message {
my @butted_bits = Butts::buttify($meme, @butt_bits);
my $butt_msg = join ' ', @butted_bits;
$self->say(channel => $where, who => $who,
body => $butt_msg, address => $prefix_addressee);
if ($reply_as_emote) {
$self->emote(channel => $where, who => $who,
body => $butt_msg, address => 0);
} else {
$self->say(channel => $where, who => $who,
body => $butt_msg, address => $prefix_addressee);
}
1;
}

View File

@ -17,5 +17,5 @@ settings:
friends: {shabble: 1, tef: 1 }
enemies: {}
frequency:
friend: 10
friend: 1
normal: 3