From cbe213af656641e77c3f856f20acff2983202dfc Mon Sep 17 00:00:00 2001 From: Andreas Neue Date: Tue, 1 Nov 2016 18:34:05 +0100 Subject: [PATCH] Markov module --- main.go | 1 + modules/markov.go | 272 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 273 insertions(+) create mode 100644 modules/markov.go diff --git a/main.go b/main.go index d371ed1..a6cd38f 100644 --- a/main.go +++ b/main.go @@ -59,6 +59,7 @@ func main() { //TODO: implement more robust list parsing modules.Init(sayCh, *mods, *params) + modules.ModParams["_nick"] = *name go func() { for { diff --git a/modules/markov.go b/modules/markov.go new file mode 100644 index 0000000..b56d8d6 --- /dev/null +++ b/modules/markov.go @@ -0,0 +1,272 @@ +// vi:ts=4:sts=4:sw=4:noet:tw=72 + +package modules + +// This Markov chain code is taken from the "Generating arbitrary text" +// codewalk: http://golang.org/doc/codewalk/markov/ +// +// Minor modifications have been made to make it easier to integrate +// with a webserver and to save/load state + +import ( + "bufio" + "encoding/gob" + "fmt" + "math/rand" + "os" + "regexp" + "strconv" + "strings" + "sync" + "time" + + "code.dnix.de/an/xlog" + + "github.com/sorcix/irc" +) + +var markovChain *MarkovChain + +func init() { + MsgFuncs["markov"] = markovHandleMessage + RunFuncs["markov"] = markovRun +} + +func markovHandleMessage(m *irc.Message) { + text := m.Trailing + if text == "" { + return + } + + text = markovParseText(text) + + markovChain.Write(text) + + answerLen, _ := strconv.Atoi(ModParams["markov-answer-len"]) + respChance, _ := strconv.Atoi(ModParams["markov-response-chance"]) + if rand.Intn(100) <= respChance || strings.HasPrefix(text, ModParams["_nick"]) { + responseText := markovChain.Generate(answerLen, text) + if responseText != "" { + go func() { + time.Sleep(time.Duration(rand.Intn(8)+2) * time.Second) + SayCh <- "*\n" + responseText + }() + } + } +} + +func markovRun() { + prefixLen, _ := strconv.Atoi(ModParams["markov-prefix-len"]) + markovChain = markovNewChain(prefixLen) + //if *importFile != "" { + // StartImport(*importFile) + filepath := ModParams["markov-import-file"] + if filepath != "-" { + file, _ := os.Open(filepath) + scanner := bufio.NewScanner(file) + for scanner.Scan() { + text := scanner.Text() + text = markovParseText(text) + if text != "" { + markovChain.Write(text) + } + } + } else { + err := markovChain.Load(ModParams["markov-state-file"]) + if err != nil { + xlog.Error(err.Error()) + } + } + go func() { + for { + time.Sleep(60 * time.Second) + markovChain.Save(ModParams["markov-state-file"]) + } + }() + +} + +func markovParseText(text string) string { + messageRegex := regexp.MustCompile(`<([^>]+)>`) + matches := messageRegex.FindAllStringSubmatch(text, -1) + for _, matches2 := range matches { + + if strings.HasPrefix(matches2[1], "http") || strings.HasPrefix(matches2[1], "mailto") { + text = strings.Replace(text, matches2[0], "", -1) + + } else if strings.HasPrefix(matches2[1], "@U") { + parts := strings.SplitN(matches2[1], "|", 2) + + if len(parts) == 2 { + text = strings.Replace(text, matches2[0], "@"+parts[1], -1) + } else { + text = strings.Replace(text, matches2[0], "", -1) + } + + } else if strings.HasPrefix(matches2[1], "@") { + text = strings.Replace(text, matches2[0], matches2[1], -1) + + } else if strings.HasPrefix(matches2[1], "#") { + parts := strings.SplitN(matches2[1], "|", 2) + + if len(parts) == 2 { + text = strings.Replace(text, matches2[0], "#"+parts[1], -1) + } else { + text = strings.Replace(text, matches2[0], "", -1) + } + + } + } + + text = strings.TrimSpace(text) + + text = strings.Replace(text, "<", "<", -1) + text = strings.Replace(text, ">", ">", -1) + text = strings.Replace(text, "&", "&", -1) + + return text +} + +// Prefix is a Markov chain prefix of one or more words. +type MarkovPrefix []string + +// String returns the Prefix as a string (for use as a map key). +func (p MarkovPrefix) String() string { + return strings.Join(p, " ") +} + +// Shift removes the first word from the Prefix and appends the given word. +func (p MarkovPrefix) Shift(word string) { + copy(p, p[1:]) + p[len(p)-1] = word +} + +// MarkovChain contains a map ("chain") of prefixes to a list of suffixes. +// A prefix is a string of prefixLen words joined with spaces. +// A suffix is a single word. A prefix can have multiple suffixes. +type MarkovChain struct { + MarkovChain map[string][]string + prefixLen int + mu sync.Mutex +} + +// NewMarkovChain returns a new MarkovChain with prefixes of prefixLen words. +func markovNewChain(prefixLen int) *MarkovChain { + return &MarkovChain{ + MarkovChain: make(map[string][]string), + prefixLen: prefixLen, + } +} + +// Write parses the bytes into prefixes and suffixes that are stored in MarkovChain. +func (c *MarkovChain) Write(in string) (int, error) { + in = strings.ToLower(in) + sr := strings.NewReader(in) + p := make(MarkovPrefix, c.prefixLen) + for { + var s string + if _, err := fmt.Fscan(sr, &s); err != nil { + break + } + key := p.String() + c.mu.Lock() + c.MarkovChain[key] = append(c.MarkovChain[key], s) + c.mu.Unlock() + p.Shift(s) + } + return len(in), nil +} + +// Generate returns a string of at most n words generated from MarkovChain. +func (c *MarkovChain) Generate(n int, in string) string { + in = strings.ToLower(in) + c.mu.Lock() + defer c.mu.Unlock() + p := make(MarkovPrefix, c.prefixLen) + p = strings.Split(in, " ") + for { + if len(p) == c.prefixLen { + break + } + if len(p) < c.prefixLen { + p = append(p, "") + } + if len(p) > c.prefixLen { + if rand.Intn(2) == 0 { + p = p[1:] + } else { + p = p[0 : len(p)-1] + } + } + } + prefix := p.String() + var words []string + for i := 0; i < n; i++ { + choices := c.MarkovChain[p.String()] + if len(choices) == 0 { + break + } + next := choices[rand.Intn(len(choices))] + words = append(words, next) + p.Shift(next) + } + if len(words) == 0 { + return "" + } else { + return prefix + " " + strings.Join(words, " ") + } +} + +// Save the chain to a file +func (c *MarkovChain) Save(fileName string) error { + // Open the file for writing + fo, err := os.Create(fileName) + if err != nil { + return err + } + // close fo on exit and check for its returned error + defer func() { + if err := fo.Close(); err != nil { + panic(err) + } + }() + + // Create an encoder and dump to it + c.mu.Lock() + defer c.mu.Unlock() + + enc := gob.NewEncoder(fo) + err = enc.Encode(c) + if err != nil { + return err + } + + return nil +} + +// Load the chain from a file +func (c *MarkovChain) Load(fileName string) error { + // Open the file for reading + fi, err := os.Open(fileName) + if err != nil { + return err + } + // close fi on exit and check for its returned error + defer func() { + if err := fi.Close(); err != nil { + panic(err) + } + }() + + // Create a decoder and read from it + c.mu.Lock() + defer c.mu.Unlock() + + dec := gob.NewDecoder(fi) + err = dec.Decode(c) + if err != nil { + return err + } + + return nil +}