new and improved Butts2.pm. Will replace Butts.pm once I fix everything else that uses the API. SVN branches confuse me :(
This commit is contained in:
parent
fadca1050c
commit
bd3c7f433f
|
@ -0,0 +1,252 @@
|
|||
package Butts2;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Math::Random;
|
||||
use TeX::Hyphen;
|
||||
use Carp;
|
||||
|
||||
use fields qw/replace_freq
|
||||
meme
|
||||
hyphen_file
|
||||
stopwords_file
|
||||
hyphenator
|
||||
stopwords
|
||||
debug/;
|
||||
|
||||
sub new {
|
||||
my Butts2 $self = shift;
|
||||
unless (ref $self) {
|
||||
$self = fields::new($self);
|
||||
}
|
||||
|
||||
my %args = (hyphen_file => 'hyphen.tex',
|
||||
stopwords_file => 'stopwords',
|
||||
debug => 0,
|
||||
meme => 'butt',
|
||||
replace_freq => (1/11),
|
||||
@_);
|
||||
|
||||
foreach my $key (keys %args) {
|
||||
$self->{$key} = $args{$key};
|
||||
}
|
||||
|
||||
$self->{hyphenator} = new TeX::Hyphen((file => $self->{hyphen_file}))
|
||||
or croak "Couldn't create TeX::Hyphen instance from " . $self->{hyphen_file};
|
||||
|
||||
my @stopwords;
|
||||
if (open my $sfh, $self->{stopwords_file}) {
|
||||
chomp(@stopwords = <$sfh>);
|
||||
close $sfh;
|
||||
} else {
|
||||
carp "Couldn't read stopwords file "
|
||||
. $self->{stopwords_file} . ' ' . $!;
|
||||
@stopwords = qw/a an and or but it in its It's it's the of you I i/;
|
||||
}
|
||||
|
||||
$self->{stopwords} = { map { lc($_) => 1 } @stopwords };
|
||||
|
||||
return $self;
|
||||
}
|
||||
|
||||
# accessor for meme
|
||||
sub meme {
|
||||
my $self = shift;
|
||||
if (@_) {
|
||||
$self->{meme} = $_[0];
|
||||
}
|
||||
return $self->{meme}
|
||||
}
|
||||
|
||||
sub is_stop_word {
|
||||
my ($self, $word) = @_;
|
||||
return exists $self->{stopwords}->{lc($word)};
|
||||
}
|
||||
|
||||
sub is_meme {
|
||||
my ($self, $word) = @_;
|
||||
return lc($word) eq lc($self->{meme});
|
||||
|
||||
}
|
||||
|
||||
sub buttify_string($_) {
|
||||
my $self = shift;
|
||||
# glom a string from $_ if we didn't get one passed.
|
||||
my $str = (@_ ? $_[0] : $_);
|
||||
chomp($str);
|
||||
my @words = split /\s+/, $str;
|
||||
return join(" ", $self->buttify(@words));
|
||||
}
|
||||
|
||||
sub buttify {
|
||||
my ($self, @words) = @_;
|
||||
my $how_many_butts = int(@words * $self->{replace_freq}) + 1;
|
||||
my $debug = $self->{debug};
|
||||
|
||||
# sort indices by word length
|
||||
my @word_idxs_len_sorted = do {
|
||||
my $c;
|
||||
|
||||
map { $_->[0] }
|
||||
sort { $b->[1] <=> $a->[1] }
|
||||
map { [$c++ , length($_) ] } @words;
|
||||
};
|
||||
|
||||
# remove stop words
|
||||
@word_idxs_len_sorted = grep {
|
||||
my $word = $words[$_];
|
||||
|
||||
my $is_word = $word !~ /^[\d\W+]+$/;
|
||||
my $is_stop = $self->is_stop_word($word);
|
||||
my $is_meme = $self->is_meme($word);
|
||||
|
||||
$is_word and not $is_stop and not $is_meme;
|
||||
} @word_idxs_len_sorted;
|
||||
|
||||
# bail out if we've got nothing left. This happens
|
||||
# when a string is comprised entirely of stop-words.
|
||||
unless (@word_idxs_len_sorted) {
|
||||
$self->log("Couldn't buttify ", join(' ', @words),
|
||||
": entirely stopwords");
|
||||
return @words;
|
||||
}
|
||||
|
||||
$self->log("Words remaining: ",
|
||||
@word_idxs_len_sorted);
|
||||
|
||||
$self->log('Words in length order: '
|
||||
. join(', ', map { $words[$_] } @word_idxs_len_sorted));
|
||||
|
||||
my @idx_weights = _sq_weight_indices (scalar @word_idxs_len_sorted);
|
||||
$self->log('index1 weightings: ',
|
||||
join(", ", @idx_weights));
|
||||
|
||||
my ($xx_n, $xx_p, $xx_x) = setup_walker_rand(\@idx_weights);
|
||||
|
||||
# keep track of which we've done already so we can pick another.
|
||||
# there's probably a better way of doing this.
|
||||
my $words_butted = {};
|
||||
$self->log("buttifying with $how_many_butts repetitions");
|
||||
for my $c (0 .. $how_many_butts-1) {
|
||||
|
||||
# Boooooooooogocheck. We really need non-replacement picks.
|
||||
my $idx_to_butt;
|
||||
do {
|
||||
my $random_idx = get_walker_rand($xx_n, $xx_p, $xx_x);
|
||||
$idx_to_butt = $word_idxs_len_sorted[$random_idx];
|
||||
} until not exists($words_butted->{$idx_to_butt});
|
||||
|
||||
$self->log("Butting word idx: $idx_to_butt [", $words[$idx_to_butt], "]");
|
||||
$words[$idx_to_butt] = $self->_buttsub($words[$idx_to_butt]);
|
||||
$words_butted->{$idx_to_butt} = 1;
|
||||
}
|
||||
|
||||
return @words;
|
||||
}
|
||||
|
||||
sub _buttsub {
|
||||
my ($self, $word) = @_;
|
||||
|
||||
my $meme = $self->{meme};
|
||||
|
||||
# split off leading and trailing punctuation
|
||||
my ($lp, $actual_word, $rp) = $word =~ /^([^A-Za-z]*)(.*?)([^A-Za-z]*)$/;
|
||||
|
||||
return $word unless $actual_word;
|
||||
|
||||
my @points = (0, $self->{hyphenator}->hyphenate($actual_word));
|
||||
|
||||
my $factor = 2;
|
||||
my $length = scalar @points;
|
||||
my $replace = $length - 1 - int(rand($length ** $factor) ** (1 / $factor));
|
||||
push @points, length($actual_word);
|
||||
|
||||
my $l = $points[$replace];
|
||||
my $r = $points[$replace + 1] - $l;
|
||||
|
||||
while (substr($actual_word, $l + $r, 1) eq 't') {
|
||||
$r++;
|
||||
}
|
||||
while ($l > 0 && substr($actual_word, $l - 1, 1) eq 'b') {
|
||||
$l--;
|
||||
}
|
||||
my $sub = substr($actual_word, $l, $r);
|
||||
my $butt = lc($meme);
|
||||
|
||||
if ($sub eq uc $sub) {
|
||||
$butt = uc($meme);
|
||||
} elsif ($sub =~/^[A-Z]/) {
|
||||
$butt = ucfirst($meme);
|
||||
}
|
||||
|
||||
substr($actual_word, $l, $r) = $butt;
|
||||
return "$lp$actual_word$rp";
|
||||
}
|
||||
|
||||
sub _sq_weight_indices {
|
||||
my $max = shift;
|
||||
return map { $max-- ** 2 } (0..$max-1);
|
||||
}
|
||||
|
||||
|
||||
# stealed frm http://code.activestate.com/recipes/576564/
|
||||
# and http://prxq.wordpress.com/2006/04/17/the-alias-method/
|
||||
# Copyright someone maybe somewhere?
|
||||
sub setup_walker_rand {
|
||||
my ($weight_ref) = @_;
|
||||
|
||||
my @weights = @$weight_ref;
|
||||
my $n = scalar @weights;
|
||||
my @in_x = (-1) x $n;
|
||||
my $sum_w = 0;
|
||||
$sum_w += $_ for @weights;
|
||||
|
||||
# normalise weights to have an average value of 1.
|
||||
@weights = map { $_ * $n / $sum_w } @weights;
|
||||
|
||||
my (@short, @long);
|
||||
my $i = 0;
|
||||
|
||||
# split into long and short groups (excluding those which == 1)
|
||||
for my $p (@weights) {
|
||||
if ($p < 1) {
|
||||
push @short, $i;
|
||||
} elsif ($p > 1) {
|
||||
push @long, $i;
|
||||
}
|
||||
$i++;
|
||||
}
|
||||
|
||||
# build alias map by combining short and long elements.
|
||||
while (scalar @short and scalar @long) {
|
||||
my $j = pop @short;
|
||||
my $k = $long[-1];
|
||||
|
||||
$in_x[$j] = $k;
|
||||
$weights[$k] -= (1 - $weights[$j]);
|
||||
|
||||
if ($weights[$k] < 1) {
|
||||
push @short, $k;
|
||||
pop @long;
|
||||
}
|
||||
# printf("test: j=%d k=%d pk=%.2f\n", $j, $k, $prob[$k]);
|
||||
}
|
||||
return ($n, \@weights, \@in_x)
|
||||
}
|
||||
|
||||
sub get_walker_rand {
|
||||
my ($n, $prob, $in_x) = @_;
|
||||
my ($u, $j);
|
||||
$u = random_uniform(1,0,1);
|
||||
$j = random_uniform_integer(1, 0, $n-1);
|
||||
return ($u <= $prob->[$j]) ? $j : $in_x->[$j];
|
||||
}
|
||||
|
||||
sub log {
|
||||
my ($self, @msg) = @_;
|
||||
if ($self->{debug}) {
|
||||
print join(" ", @msg) . $/;
|
||||
}
|
||||
}
|
||||
1;
|
|
@ -0,0 +1,21 @@
|
|||
#!/usr/bin/perl
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Butts2;
|
||||
|
||||
my $butt = $ARGV[0] || "butt";
|
||||
my $buttifier = Butts2->new(meme => $butt, debug => 1,
|
||||
replace_freq => $ARGV[1] || 0.5);
|
||||
|
||||
while(<STDIN>) {
|
||||
|
||||
# The old way
|
||||
|
||||
# chomp
|
||||
# print join(" ", $buttifier->buttify(split(/\s+/, $_)))."\n";
|
||||
|
||||
# The new way
|
||||
|
||||
print $buttifier->buttify_string, $/;
|
||||
}
|
|
@ -28,7 +28,7 @@ sub init {
|
|||
my $self = shift;
|
||||
|
||||
$self->{settings}->{friends} = {};
|
||||
$self->{settings}->{friends} = {};
|
||||
$self->{settings}->{enemies} = {};
|
||||
|
||||
$self->load_config(0);
|
||||
|
||||
|
@ -185,8 +185,20 @@ sub kicked {
|
|||
return;
|
||||
}
|
||||
|
||||
# TODO: refactor these 3 better. Emote should never have to deal with commands
|
||||
# or prefixes. Just a message to be re-butted.
|
||||
sub emoted {
|
||||
my ($self, $ref) = @_;
|
||||
$self->handle_said_emoted($ref, 1);
|
||||
}
|
||||
|
||||
sub said {
|
||||
my ($self, $ref) = @_;
|
||||
$self->handle_said_emoted($ref, 0);
|
||||
}
|
||||
|
||||
sub handle_said_emoted {
|
||||
my ($self, $ref, $reply_as_emote) = @_;
|
||||
# slicin' ma hashes.
|
||||
my ($channel, $body, $address, $who) =
|
||||
@{$ref}{qw/channel body address who/};
|
||||
|
@ -214,7 +226,7 @@ sub said {
|
|||
$self->log("BUTT: Might butt\n");
|
||||
if ($self->to_butt_or_not_to_butt($who)) {
|
||||
$self->log("BUTT: Butting $who in [$channel]\n");
|
||||
$self->buttify_message($who, $channel, $body, 0);
|
||||
$self->buttify_message($who, $channel, $body, $reply_as_emote, 0);
|
||||
}
|
||||
|
||||
return;
|
||||
|
@ -318,7 +330,6 @@ sub handle_pm_command {
|
|||
}
|
||||
|
||||
} elsif ($cmd eq 'change-nick') {
|
||||
#TODO: this
|
||||
unless ($self->config_bool('changenick')) {
|
||||
$self->pm_reply($who, "Sorry, changing nicks is disabled.");
|
||||
return 1;
|
||||
|
@ -379,14 +390,17 @@ sub handle_channel_command {
|
|||
return 1;
|
||||
}
|
||||
|
||||
return 0; # TODO: unimplemented.
|
||||
return 0;
|
||||
|
||||
# TODO: !stopit - adds them to the enemies list.
|
||||
# TODO: !butt - randomly butts something?
|
||||
}
|
||||
|
||||
sub buttify_message {
|
||||
my ($self, $who, $where, $what, $prefix_addressee) = @_;
|
||||
my ($self, $who, $where,
|
||||
$what, $reply_as_emote,
|
||||
$prefix_addressee) = @_;
|
||||
|
||||
my $meme = $self->config('meme');
|
||||
|
||||
$prefix_addressee = 0 unless defined $prefix_addressee;
|
||||
|
@ -395,8 +409,13 @@ sub buttify_message {
|
|||
my @butted_bits = Butts::buttify($meme, @butt_bits);
|
||||
my $butt_msg = join ' ', @butted_bits;
|
||||
|
||||
$self->say(channel => $where, who => $who,
|
||||
body => $butt_msg, address => $prefix_addressee);
|
||||
if ($reply_as_emote) {
|
||||
$self->emote(channel => $where, who => $who,
|
||||
body => $butt_msg, address => 0);
|
||||
} else {
|
||||
$self->say(channel => $where, who => $who,
|
||||
body => $butt_msg, address => $prefix_addressee);
|
||||
}
|
||||
1;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,5 +17,5 @@ settings:
|
|||
friends: {shabble: 1, tef: 1 }
|
||||
enemies: {}
|
||||
frequency:
|
||||
friend: 10
|
||||
friend: 1
|
||||
normal: 3
|
||||
|
|
Loading…
Reference in New Issue