lcmstaff
/
LCMbot


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
							# -*- coding: utf-8 -*-
import numpy as np
from sys import argv
from random import random as rand


def produce_sentence(word=None):
    # load beginnigs, ends, and pairs of words
    begs = np.loadtxt('words_db/beginnings', dtype=[('w','S20'), ('p','f8')])
    ends = np.loadtxt('words_db/ends', usecols=(0,), dtype=str)
    pairs = np.loadtxt('words_db/pairs',
                       dtype=[('w1', 'S20'), ('w2', 'S20'), ('p','f8')])

    # normalise probabilities
    begs['p'] /= begs['p'].sum()

    if word is not None:
        word = str(word) # dark ritual to fix issue with re-encoding unicode
        sentence = [word]
        if word not in begs['w'] and word not in pairs['w2']:
            sentence = (['We', 'never', 'said', word + '.'])
            word = np.random.choice(begs['w'], p=begs['p'])
        else:
            # build sentence backwards from word to beginning
            # 1 out of 4 times try to go backwards even if word is a beginning
            while sentence[0] not in begs['w'] \
                    or (rand() < 0.25 and sentence[0] in pairs['w2']):
                tmp_pairs = pairs[pairs['w2'] == sentence[0]]
                norm_probs = tmp_pairs['p'] / tmp_pairs['p'].sum()
                sentence.insert(0, np.random.choice(tmp_pairs['w1'],
                                p=norm_probs))
    else:
        # choose a beginning and start sentence
        word = np.random.choice(begs['w'], p=begs['p'])
        sentence = [word]

    # create rest of the sentence
    # if sentence reaches 15 words length, just stop
    while len(sentence) < 15 or word not in ends:
        while word not in pairs['w1']:
            # cannot continue from here. let's start again
            sentence[-1] += '.'
            word = np.random.choice(begs['w'], p=begs['p'])
            sentence.append(word)

        # add word to sentence
        tmp_pairs = pairs[pairs['w1'] == word]
        norm_probs = tmp_pairs['p'] / tmp_pairs['p'].sum()
        word = np.random.choice(tmp_pairs['w2'], p=norm_probs)
        sentence.append(word)

    return ' '.join(sentence) + '.'


if __name__ == "__main__":
    print produce_sentence(argv[1] if len(argv) > 1 else None)