Browse Source

add support for "/rndwisdom" command

it builds a pseudo-random sentence using a database of words and
pair-of-words frequencies used by admins and ex-admins in the
"Staff LCM" telegram chat. A word that must be present in the
sentence can be specified as an argument.
bluehood 3 years ago
parent
commit
16b67fe01b
6 changed files with 35984 additions and 0 deletions
  1. 1 0
      LCMbot.py
  2. 7 0
      handlers.py
  3. 56 0
      speak.py
  4. 1565 0
      words_db/beginnings
  5. 2997 0
      words_db/ends
  6. 31358 0
      words_db/pairs

+ 1 - 0
LCMbot.py

@@ -16,6 +16,7 @@ def main():
     dp.add_handler(CommandHandler('vietnam', hnd.vietnam))
     dp.add_handler(CommandHandler('sellyourmother', hnd.sell_your_mother))
     dp.add_handler(CommandHandler('abuse150', hnd.abuse_150))
+    dp.add_handler(CommandHandler('rndwisdom', hnd.speak, pass_args=True))
 
     tale_handler = TaleHandler()
     dp.add_handler(CommandHandler('addatale', tale_handler.prompt_user))

+ 7 - 0
handlers.py

@@ -3,6 +3,7 @@ import subprocess as sp
 import httplib
 import numpy as np
 from insults import insults
+from speak import produce_sentence
 import logging
 # enable logging
 fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
@@ -59,3 +60,9 @@ def abuse_150(bot, update):
     """Verbally abuse incompetent LCM collaborators"""
     insult = np.random.choice(insults)
     update.message.reply_text(text=insult, quote=False)
+
+
+def speak(bot, update, args):
+    """Produce pseudo-random wise words"""
+    word = args[0] if len(args) > 0 else None
+    update.message.reply_text(text=produce_sentence(word), quote=False)

+ 56 - 0
speak.py

@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+from sys import argv
+from random import random as rand
+
+
+def produce_sentence(word=None):
+    # load beginnigs, ends, and pairs of words
+    begs = np.loadtxt('words_db/beginnings', dtype=[('w','S20'), ('p','f8')])
+    ends = np.loadtxt('words_db/ends', usecols=(0,), dtype=str)
+    pairs = np.loadtxt('words_db/pairs',
+                       dtype=[('w1', 'S20'), ('w2', 'S20'), ('p','f8')])
+
+    # normalise probabilities
+    begs['p'] /= begs['p'].sum()
+
+    if word is not None:
+        word = str(word) # dark ritual to fix issue with re-encoding unicode
+        sentence = [word]
+        if word not in begs['w'] and word not in pairs['w2']:
+            sentence = (['We', 'never', 'said', word + '.'])
+            word = np.random.choice(begs['w'], p=begs['p'])
+        else:
+            # build sentence backwards from word to beginning
+            # 1 out of 4 times try to go backwards even if word is a beginning
+            while sentence[0] not in begs['w'] \
+                    or (rand() < 0.25 and sentence[0] in pairs['w2']):
+                tmp_pairs = pairs[pairs['w2'] == sentence[0]]
+                norm_probs = tmp_pairs['p'] / tmp_pairs['p'].sum()
+                sentence.insert(0, np.random.choice(tmp_pairs['w1'],
+                                p=norm_probs))
+    else:
+        # choose a beginning and start sentence
+        word = np.random.choice(begs['w'], p=begs['p'])
+        sentence = [word]
+
+    # create rest of the sentence
+    # if sentence reaches 15 words length, just stop
+    while len(sentence) < 15 or word not in ends:
+        while word not in pairs['w1']:
+            # cannot continue from here. let's start again
+            sentence[-1] += '.'
+            word = np.random.choice(begs['w'], p=begs['p'])
+            sentence.append(word)
+
+        # add word to sentence
+        tmp_pairs = pairs[pairs['w1'] == word]
+        norm_probs = tmp_pairs['p'] / tmp_pairs['p'].sum()
+        word = np.random.choice(tmp_pairs['w2'], p=norm_probs)
+        sentence.append(word)
+
+    return ' '.join(sentence) + '.'
+
+
+if __name__ == "__main__":
+    print produce_sentence(argv[1] if len(argv) > 1 else None)

File diff suppressed because it is too large
+ 1565 - 0
words_db/beginnings


File diff suppressed because it is too large
+ 2997 - 0
words_db/ends


File diff suppressed because it is too large
+ 31358 - 0
words_db/pairs