280 lines
		
	
	
	
		
			6.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			280 lines
		
	
	
	
		
			6.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // vi:ts=4:sts=4:sw=4:noet:tw=72
 | |
| 
 | |
| package modules
 | |
| 
 | |
| // This Markov chain code is taken from the "Generating arbitrary text"
 | |
| // codewalk: http://golang.org/doc/codewalk/markov/
 | |
| //
 | |
| // Minor modifications have been made to make it easier to integrate
 | |
| // with a webserver and to save/load state
 | |
| 
 | |
| import (
 | |
| 	"bufio"
 | |
| 	"encoding/gob"
 | |
| 	"flag"
 | |
| 	"fmt"
 | |
| 	"math/rand"
 | |
| 	"os"
 | |
| 	"regexp"
 | |
| 	"strings"
 | |
| 	"sync"
 | |
| 	"time"
 | |
| 
 | |
| 	"git.dnix.de/an/xlog"
 | |
| )
 | |
| 
 | |
| var (
 | |
| 	markovPrefixLen      = flag.Int("markov_prefix_len", 3, "markov: prefix len")
 | |
| 	markovAnswerLen      = flag.Int("markov_answer_len", 10, "markov: answer len")
 | |
| 	markovResponseChance = flag.Int("markov_response_chance", 10, "markov: chance to get an answer (percent)")
 | |
| 	markovStateFile      = flag.String("markov_state_file", "state.dat", "markov: state file")
 | |
| 	markovTrainFile      = flag.String("markov_train_file", "train.txt", "markov: training file")
 | |
| )
 | |
| 
 | |
| var markovChain *MarkovChain
 | |
| 
 | |
| func init() {
 | |
| 	MsgFuncs["markov"] = markovHandleMessage
 | |
| 	RunFuncs["markov"] = markovRun
 | |
| }
 | |
| 
 | |
| func markovHandleMessage(m *Message) {
 | |
| 	text := m.Text
 | |
| 	if text == "" {
 | |
| 		return
 | |
| 	}
 | |
| 	text = markovParseText(text)
 | |
| 
 | |
| 	if rand.Intn(100) <= *markovResponseChance || strings.Index(text, BotNick) != -1 {
 | |
| 		responseText := markovChain.Generate(*markovAnswerLen, text)
 | |
| 		if responseText != "" {
 | |
| 			go func() {
 | |
| 				time.Sleep(time.Duration(rand.Intn(8)+2) * time.Second)
 | |
| 				SayCh <- m.Channel + "\n" + responseText
 | |
| 			}()
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	markovChain.Write(text)
 | |
| }
 | |
| 
 | |
| func markovRun() {
 | |
| 	markovChain = markovNewChain(*markovPrefixLen)
 | |
| 	err := markovChain.Load(*markovStateFile)
 | |
| 	if err != nil {
 | |
| 		xlog.Error(err.Error())
 | |
| 	}
 | |
| 	filepath := *markovTrainFile
 | |
| 	if filepath != "-" {
 | |
| 		file, _ := os.Open(filepath)
 | |
| 		scanner := bufio.NewScanner(file)
 | |
| 		for scanner.Scan() {
 | |
| 			text := scanner.Text()
 | |
| 			text = markovParseText(text)
 | |
| 			if text != "" {
 | |
| 				markovChain.Write(text)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	go func() {
 | |
| 		for {
 | |
| 			time.Sleep(60 * time.Second)
 | |
| 			markovChain.Save(*markovStateFile)
 | |
| 		}
 | |
| 	}()
 | |
| }
 | |
| 
 | |
| func markovParseText(text string) string {
 | |
| 	messageRegex := regexp.MustCompile(`<([^>]+)>`)
 | |
| 	matches := messageRegex.FindAllStringSubmatch(text, -1)
 | |
| 	for _, matches2 := range matches {
 | |
| 		if strings.HasPrefix(matches2[1], "http") || strings.HasPrefix(matches2[1], "mailto") {
 | |
| 			text = strings.Replace(text, matches2[0], "", -1)
 | |
| 		} else if strings.HasPrefix(matches2[1], "@U") {
 | |
| 			parts := strings.SplitN(matches2[1], "|", 2)
 | |
| 			if len(parts) == 2 {
 | |
| 				text = strings.Replace(text, matches2[0], "@"+parts[1], -1)
 | |
| 			} else {
 | |
| 				text = strings.Replace(text, matches2[0], "", -1)
 | |
| 			}
 | |
| 		} else if strings.HasPrefix(matches2[1], "@") {
 | |
| 			text = strings.Replace(text, matches2[0], matches2[1], -1)
 | |
| 		} else if strings.HasPrefix(matches2[1], "#") {
 | |
| 			parts := strings.SplitN(matches2[1], "|", 2)
 | |
| 			if len(parts) == 2 {
 | |
| 				text = strings.Replace(text, matches2[0], "#"+parts[1], -1)
 | |
| 			} else {
 | |
| 				text = strings.Replace(text, matches2[0], "", -1)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	text = strings.TrimSpace(text)
 | |
| 	text = strings.Replace(text, "<", "<", -1)
 | |
| 	text = strings.Replace(text, ">", ">", -1)
 | |
| 	text = strings.Replace(text, "&", "&", -1)
 | |
| 	text = strings.Replace(text, ",", " ", -1)
 | |
| 
 | |
| 	return strings.ToLower(text)
 | |
| }
 | |
| 
 | |
| // Prefix is a Markov chain prefix of one or more words.
 | |
| type MarkovPrefix []string
 | |
| 
 | |
| // String returns the Prefix as a string (for use as a map key).
 | |
| func (p MarkovPrefix) String() string {
 | |
| 	return strings.Trim(strings.Join(p, " "), " ")
 | |
| }
 | |
| 
 | |
| // Shift removes the first word from the Prefix and appends the given word.
 | |
| func (p MarkovPrefix) Shift(word string) {
 | |
| 	copy(p, p[1:])
 | |
| 	p[len(p)-1] = word
 | |
| }
 | |
| 
 | |
| // MarkovChain contains a map ("chain") of prefixes to a list of suffixes.
 | |
| // A prefix is a string of prefixLen words joined with spaces.
 | |
| // A suffix is a single word. A prefix can have multiple suffixes.
 | |
| type MarkovChain struct {
 | |
| 	MarkovChain map[string][]string
 | |
| 	prefixLen   int
 | |
| 	mu          sync.Mutex
 | |
| }
 | |
| 
 | |
| // NewMarkovChain returns a new MarkovChain with prefixes of prefixLen words.
 | |
| func markovNewChain(prefixLen int) *MarkovChain {
 | |
| 	return &MarkovChain{
 | |
| 		MarkovChain: make(map[string][]string),
 | |
| 		prefixLen:   prefixLen,
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // Write parses the bytes into prefixes and suffixes that are stored in MarkovChain.
 | |
| func (c *MarkovChain) Write(in string) (int, error) {
 | |
| 	in = strings.ToLower(in)
 | |
| 	if strings.HasPrefix(in, BotNick) {
 | |
| 		tok := strings.Split(in, " ")
 | |
| 		in = strings.Replace(in, tok[0]+" ", "", 1)
 | |
| 	}
 | |
| 	sr := strings.NewReader(in)
 | |
| 	p := make(MarkovPrefix, c.prefixLen)
 | |
| 	for {
 | |
| 		var s string
 | |
| 		if _, err := fmt.Fscan(sr, &s); err != nil {
 | |
| 			break
 | |
| 		}
 | |
| 		key := p.String()
 | |
| 		c.mu.Lock()
 | |
| 		c.MarkovChain[key] = append(c.MarkovChain[key], s)
 | |
| 		c.mu.Unlock()
 | |
| 		xlog.Debug("Chain len: %d, learned [%s] [%s]", len(c.MarkovChain), key, s)
 | |
| 		p.Shift(s)
 | |
| 	}
 | |
| 	return len(in), nil
 | |
| }
 | |
| 
 | |
| // Generate returns a string of at most n words generated from MarkovChain.
 | |
| func (c *MarkovChain) Generate(n int, in string) string {
 | |
| 	in = strings.ToLower(in)
 | |
| 	if strings.HasPrefix(in, BotNick) {
 | |
| 		tok := strings.Split(in, " ")
 | |
| 		in = strings.Replace(in, tok[0]+" ", "", 1)
 | |
| 	}
 | |
| 	c.mu.Lock()
 | |
| 	defer c.mu.Unlock()
 | |
| 	var p MarkovPrefix
 | |
| 	var words []string
 | |
| 	var start string
 | |
| 	for attempt := 0; attempt < 10; attempt++ {
 | |
| 		/*
 | |
| 			p = make(MarkovPrefix, c.prefixLen)
 | |
| 			p = strings.Split(in, " ")
 | |
| 			if len(p) > c.prefixLen {
 | |
| 				i := rand.Intn(len(p) - 2)
 | |
| 				p = p[i : i+c.prefixLen]
 | |
| 			}
 | |
| 		*/
 | |
| 		p = make(MarkovPrefix, 1)
 | |
| 		inWords := strings.Split(in, " ")
 | |
| 		start = inWords[rand.Intn(len(inWords))]
 | |
| 		p[0] = start
 | |
| 		//ss = p.String()
 | |
| 		xlog.Debug("Looking for answer on [%s]", start)
 | |
| 		for i := 0; i < n; i++ {
 | |
| 			choices := c.MarkovChain[p.String()]
 | |
| 			if len(choices) == 0 {
 | |
| 				break
 | |
| 			}
 | |
| 			next := choices[rand.Intn(len(choices))]
 | |
| 			words = append(words, next)
 | |
| 			if strings.HasSuffix(next, ".") || strings.HasSuffix(next, "!") || strings.HasSuffix(next, "?") {
 | |
| 				break
 | |
| 			}
 | |
| 			p.Shift(next)
 | |
| 		}
 | |
| 		if len(words) > 0 {
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 	start = strings.Trim(start, " ")
 | |
| 	if len(words) == 0 {
 | |
| 		xlog.Debug("No answer found")
 | |
| 		return start + " ... pfrrrz"
 | |
| 	} else {
 | |
| 		xlog.Debug("Found words: [%s]", strings.Join(words, " "))
 | |
| 		return start + " " + strings.Join(words, " ")
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // Save the chain to a file
 | |
| func (c *MarkovChain) Save(fileName string) error {
 | |
| 	// Open the file for writing
 | |
| 	fo, err := os.Create(fileName)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	// close fo on exit and check for its returned error
 | |
| 	defer func() {
 | |
| 		if err := fo.Close(); err != nil {
 | |
| 			panic(err)
 | |
| 		}
 | |
| 	}()
 | |
| 
 | |
| 	// Create an encoder and dump to it
 | |
| 	c.mu.Lock()
 | |
| 	defer c.mu.Unlock()
 | |
| 
 | |
| 	enc := gob.NewEncoder(fo)
 | |
| 	err = enc.Encode(c)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Load the chain from a file
 | |
| func (c *MarkovChain) Load(fileName string) error {
 | |
| 	// Open the file for reading
 | |
| 	fi, err := os.Open(fileName)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	// close fi on exit and check for its returned error
 | |
| 	defer func() {
 | |
| 		if err := fi.Close(); err != nil {
 | |
| 			panic(err)
 | |
| 		}
 | |
| 	}()
 | |
| 
 | |
| 	// Create a decoder and read from it
 | |
| 	c.mu.Lock()
 | |
| 	defer c.mu.Unlock()
 | |
| 
 | |
| 	dec := gob.NewDecoder(fi)
 | |
| 	err = dec.Decode(c)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 |