Create a Spam Detection Telegram Bot using Alith

There have been many spam messages in the telegram chats, these scams collect personal information and put the community at risk.

I want the Telegram Bot to be able to detect spam in the group chats that I'm in. Let’s use Python for this.

from telegram import Update
from telegram.ext import (
    Application,
    MessageHandler,
    filters,
    CallbackContext,
)
 
from alith import Agent

agent = Agent(
    name="Telegram Bot Agent",
    model="gpt-4",
    preamble="""You are a spam detection bot. You will be given a message and your task is to determine if it is spam or not. 
    If you think the message is spam, you will return \"true\". If you think the message is not spam, you will return \"false\".""",
)

async def process_message(message_text, context: CallbackContext) -> None:
    response = agent.prompt(message_text)
    if "true" in response.lower():
        await context.bot.send_message(chat_id=ADMIN_TELEGRAM_USER_ID, text=f"\"{message_text}\" is spam and stored.")
    else:
        await context.bot.send_message(chat_id=ADMIN_TELEGRAM_USER_ID, text=f"\"{message_text}\" is not spam.")

async def handle_group_message(update: Update, context: CallbackContext) -> None:
    message = update.message

    # Prefer caption if present (common for forwarded/channel messages), else text
    message_text = None
    if message.caption:
        message_text = message.caption
    elif message.text:
        message_text = message.text

    if not message_text:
        return

    await process_message(message_text, context)


app = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
app.add_handler(MessageHandler((filters.TEXT | filters.FORWARDED) & filters.ChatType.GROUPS, handle_group_message))


if __name__ == "__main__":
    app.run_polling()

This code allows the bot to run in a group chat and forwards all messages to the Alith client for evaluation. Let's see an example.

An Example Spam Message

Now, it does a good job for the most part, but some responses might be inaccurate. We want to give the AI as much information about its task as possible. To do this, we have to use Retrieval-Augmented Generation (RAG) and localize the model on a dataset of past spam messages. Let’s add in that feature.

from telegram import Update
from telegram.ext import (
    Application,
    MessageHandler,
    filters,
    CallbackContext,
)

- from alith import Agent
+ from alith import Agent, MilvusStore, chunk_text

agent = Agent(
    name="Telegram Bot Agent",
    model="gpt-4",
    preamble="""You are a spam detection bot. You will be given a message and your task is to determine if it is spam or not. 
    If you think the message is spam, you will return \"true\". If you think the message is not spam, you will return \"false\".""",
+   store=MilvusStore(),    
)

+ def store_message_in_vector_store(message_text):
+     chunks = chunk_text(message_text, overlap_percent=0.2)
+     agent.store.save_docs(chunks)
+
+ async def handle_message(update: Update, context: CallbackContext) -> None:
+     message = update.message
+
+     # Prefer caption if present (common for forwarded/channel messages), else text
+     message_text = None
+     if message.caption:
+         message_text = message.caption
+     elif message.text:
+         message_text = message.text
+
+     # Only process if there is text/caption
+     if message_text:
+         store_message_in_vector_store(message_text)
+         await context.bot.send_message(chat_id=ADMIN_TELEGRAM_USER_ID, text=f"\"{message_text}\" is stored.")

async def process_message(message_text, context: CallbackContext) -> None:
    response = agent.prompt(message_text)
    if "true" in response.lower():+       # Store the spam message in the dataset 
+       store_message_in_vector_store(message_text)        await context.bot.send_message(chat_id=ADMIN_TELEGRAM_USER_ID, text=f"\"{message_text}\" is spam and stored.")
    else:
        await context.bot.send_message(chat_id=ADMIN_TELEGRAM_USER_ID, text=f"\"{message_text}\" is not spam.")

async def handle_group_message(update: Update, context: CallbackContext) -> None:
    message = update.message

    # Prefer caption if present (common for forwarded/channel messages), else text
    message_text = None
    if message.caption:
        message_text = message.caption
    elif message.text:
        message_text = message.text

    if not message_text:
        return
+   # Search for the message in the store
+   results = agent.store.search(message_text, 3, 0.8)
+   if results:
+       # If there is a match, forward to the agent        await process_message(message_text, context)


app = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
app.add_handler(MessageHandler((filters.TEXT | filters.FORWARDED) & filters.ChatType.GROUPS, handle_group_message))
+ app.add_handler(MessageHandler(filters.TEXT | filters.FORWARDED, handle_message))

if __name__ == "__main__":
    app.run_polling()
A spam message that is provided to the RAG

In addition to adding RAG, we can use it to filter posts that match the criteria of a spam message. If the text matches a typical spam message, then it will be sent to Alith. This prevents Alith from processing every request that is made, only the ones that pass the filtering criteria of a spam message.

An example message that is not processed by Alith

This was a simple example on creating a Telegram bot with RAG enabled. All code can be found here.