// vi:ts=4:sts=4:sw=4:noet:tw=72 package modules // This Markov chain code is taken from the "Generating arbitrary text" // codewalk: http://golang.org/doc/codewalk/markov/ // // Minor modifications have been made to make it easier to integrate // with a webserver and to save/load state import ( "bufio" "encoding/gob" "flag" "fmt" "math/rand" "os" "regexp" "strings" "sync" "time" "git.dnix.de/an/xlog" ) var ( markovPrefixLen = flag.Int("markov_prefix_len", 3, "markov: prefix len") markovAnswerLen = flag.Int("markov_answer_len", 10, "markov: answer len") markovResponseChance = flag.Int("markov_response_chance", 10, "markov: chance to get an answer (percent)") markovStateFile = flag.String("markov_state_file", "state.dat", "markov: state file") markovTrainFile = flag.String("markov_train_file", "train.txt", "markov: training file") ) var markovChain *MarkovChain func init() { MsgFuncs["markov"] = markovHandleMessage RunFuncs["markov"] = markovRun } func markovHandleMessage(m *Message) { if strings.HasPrefix(m.Text, "!") { return } tok := strings.Split(m.Text, " ") if tok[0] == "!talk" { if len(tok) < 2 { SayCh <- m.Channel + "\npfffrrz ... worüber denn?" } else { talk := tok[1] for { time.Sleep(time.Duration(rand.Intn(8)+2) * time.Second) talk = markovChain.Generate(*markovAnswerLen, talk) SayCh <- m.Channel + "\n" + talk if rand.Intn(100) < 10 { break } } } return } text := m.Text if text == "" { return } text = markovParseText(text) if rand.Intn(100) <= *markovResponseChance || strings.Index(text, BotNick) != -1 { responseText := markovChain.Generate(*markovAnswerLen, text) if responseText != "" { go func() { time.Sleep(time.Duration(rand.Intn(8)+2) * time.Second) SayCh <- m.Channel + "\n" + responseText }() } } if m.From != BotName { markovChain.Write(text) } } func markovRun() { markovChain = markovNewChain(*markovPrefixLen) err := markovChain.Load(*markovStateFile) if err != nil { xlog.Error(err.Error()) } filepath := *markovTrainFile if filepath != "-" { file, _ := os.Open(filepath) scanner := bufio.NewScanner(file) for scanner.Scan() { text := scanner.Text() text = markovParseText(text) if text != "" { markovChain.Write(text) } } } go func() { for { time.Sleep(60 * time.Second) markovChain.Save(*markovStateFile) } }() } func markovParseText(text string) string { messageRegex := regexp.MustCompile(`<([^>]+)>`) matches := messageRegex.FindAllStringSubmatch(text, -1) for _, matches2 := range matches { if strings.HasPrefix(matches2[1], "http") || strings.HasPrefix(matches2[1], "mailto") { text = strings.Replace(text, matches2[0], "", -1) } else if strings.HasPrefix(matches2[1], "@U") { parts := strings.SplitN(matches2[1], "|", 2) if len(parts) == 2 { text = strings.Replace(text, matches2[0], "@"+parts[1], -1) } else { text = strings.Replace(text, matches2[0], "", -1) } } else if strings.HasPrefix(matches2[1], "@") { text = strings.Replace(text, matches2[0], matches2[1], -1) } else if strings.HasPrefix(matches2[1], "#") { parts := strings.SplitN(matches2[1], "|", 2) if len(parts) == 2 { text = strings.Replace(text, matches2[0], "#"+parts[1], -1) } else { text = strings.Replace(text, matches2[0], "", -1) } } } text = strings.TrimSpace(text) text = strings.Replace(text, "<", "<", -1) text = strings.Replace(text, ">", ">", -1) text = strings.Replace(text, "&", "&", -1) //text = strings.Replace(text, ",", " ", -1) //reg := regexp.MustCompile("[^a-zA-Z0-9 ]+") //delText := reg.ReplaceAllString(text, "") return strings.ToLower(text) } // Prefix is a Markov chain prefix of one or more words. type MarkovPrefix []string // String returns the Prefix as a string (for use as a map key). func (p MarkovPrefix) String() string { return strings.Trim(strings.Join(p, " "), " ") } // Shift removes the first word from the Prefix and appends the given word. func (p MarkovPrefix) Shift(word string) { copy(p, p[1:]) p[len(p)-1] = word } // MarkovChain contains a map ("chain") of prefixes to a list of suffixes. // A prefix is a string of prefixLen words joined with spaces. // A suffix is a single word. A prefix can have multiple suffixes. type MarkovChain struct { MarkovChain map[string][]string prefixLen int mu sync.Mutex } // NewMarkovChain returns a new MarkovChain with prefixes of prefixLen words. func markovNewChain(prefixLen int) *MarkovChain { return &MarkovChain{ MarkovChain: make(map[string][]string), prefixLen: prefixLen, } } func markovContains(a []string, s string) bool { for _, v := range a { if v == s { return true } } return false } // Write parses the bytes into prefixes and suffixes that are stored in MarkovChain. func (c *MarkovChain) Write(in string) (int, error) { in = strings.ToLower(in) if strings.HasPrefix(in, BotNick) { tok := strings.Split(in, " ") in = strings.Replace(in, tok[0]+" ", "", 1) } sr := strings.NewReader(in) p := make(MarkovPrefix, c.prefixLen) c.mu.Lock() defer c.mu.Unlock() for { var s string if _, err := fmt.Fscan(sr, &s); err != nil { break } key := p.String() if !markovContains(c.MarkovChain[key], s) { c.MarkovChain[key] = append(c.MarkovChain[key], s) xlog.Debug("Chain len: %d, learned [%s] [%s]", len(c.MarkovChain), key, s) } else { xlog.Debug("Chain len: %d, [%s] [%s] is already known", len(c.MarkovChain), key, s) } p.Shift(s) } return len(in), nil } // Generate returns a string of at most n words generated from MarkovChain. func (c *MarkovChain) Generate(n int, in string) string { in = strings.ToLower(in) if strings.HasPrefix(in, BotNick) { tok := strings.Split(in, " ") in = strings.Replace(in, tok[0]+" ", "", 1) } c.mu.Lock() defer c.mu.Unlock() var p MarkovPrefix var words []string var start string for attempt := 0; attempt < 10; attempt++ { p = make(MarkovPrefix, c.prefixLen) inWords := strings.Split(in, " ") if attempt < 9 { start = inWords[rand.Intn(len(inWords))] } else { start = "" } p.Shift(start) xlog.Debug("Looking for answer on [%s]", start) for i := 0; i < n; i++ { choices := c.MarkovChain[p.String()] if len(choices) == 0 { break } next := choices[rand.Intn(len(choices))] words = append(words, next) if strings.HasSuffix(next, ".") || strings.HasSuffix(next, "!") || strings.HasSuffix(next, "?") { break } p.Shift(next) } if len(words) > 0 { break } } start = strings.Trim(start, " ") return start + " " + strings.Join(words, " ") } // Save the chain to a file func (c *MarkovChain) Save(fileName string) error { // Open the file for writing fo, err := os.Create(fileName) if err != nil { return err } // close fo on exit and check for its returned error defer func() { if err := fo.Close(); err != nil { panic(err) } }() // Create an encoder and dump to it c.mu.Lock() defer c.mu.Unlock() enc := gob.NewEncoder(fo) err = enc.Encode(c) if err != nil { return err } return nil } // Load the chain from a file func (c *MarkovChain) Load(fileName string) error { // Open the file for reading fi, err := os.Open(fileName) if err != nil { return err } // close fi on exit and check for its returned error defer func() { if err := fi.Close(); err != nil { panic(err) } }() // Create a decoder and read from it c.mu.Lock() defer c.mu.Unlock() dec := gob.NewDecoder(fi) err = dec.Decode(c) if err != nil { return err } return nil }