speak.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. from sys import argv
  4. from random import random as rand
  5. def produce_sentence(word=None):
  6. # load beginnigs, ends, and pairs of words
  7. begs = np.loadtxt('words_db/beginnings', dtype=[('w','S20'), ('p','f8')])
  8. ends = np.loadtxt('words_db/ends', usecols=(0,), dtype=str)
  9. pairs = np.loadtxt('words_db/pairs',
  10. dtype=[('w1', 'S20'), ('w2', 'S20'), ('p','f8')])
  11. # normalise probabilities
  12. begs['p'] /= begs['p'].sum()
  13. if word is not None:
  14. word = str(word) # dark ritual to fix issue with re-encoding unicode
  15. sentence = [word]
  16. if word not in begs['w'] and word not in pairs['w2']:
  17. sentence = (['We', 'never', 'said', word + '.'])
  18. word = np.random.choice(begs['w'], p=begs['p'])
  19. else:
  20. # build sentence backwards from word to beginning
  21. # 1 out of 4 times try to go backwards even if word is a beginning
  22. while sentence[0] not in begs['w'] \
  23. or (rand() < 0.25 and sentence[0] in pairs['w2']):
  24. tmp_pairs = pairs[pairs['w2'] == sentence[0]]
  25. norm_probs = tmp_pairs['p'] / tmp_pairs['p'].sum()
  26. sentence.insert(0, np.random.choice(tmp_pairs['w1'],
  27. p=norm_probs))
  28. else:
  29. # choose a beginning and start sentence
  30. word = np.random.choice(begs['w'], p=begs['p'])
  31. sentence = [word]
  32. # create rest of the sentence
  33. # if sentence reaches 15 words length, just stop
  34. while len(sentence) < 15 or word not in ends:
  35. while word not in pairs['w1']:
  36. # cannot continue from here. let's start again
  37. sentence[-1] += '.'
  38. word = np.random.choice(begs['w'], p=begs['p'])
  39. sentence.append(word)
  40. # add word to sentence
  41. tmp_pairs = pairs[pairs['w1'] == word]
  42. norm_probs = tmp_pairs['p'] / tmp_pairs['p'].sum()
  43. word = np.random.choice(tmp_pairs['w2'], p=norm_probs)
  44. sentence.append(word)
  45. return ' '.join(sentence) + '.'
  46. if __name__ == "__main__":
  47. print produce_sentence(argv[1] if len(argv) > 1 else None)