"""
**BeamerBot Module**
--------------------
The `BeamerBot` module provides a framework for generating structured LaTeX Beamer slides based on lesson objectives, readings, and prior lesson presentations. By using a language model (LLM), `BeamerBot` automates the process of slide creation, ensuring a consistent slide structure while allowing for custom guidance and validation.
Key Functionalities
~~~~~~~~~~~~~~~~~~~
1. **Automated Slide Generation**:
- `BeamerBot` generates a LaTeX Beamer presentation for each lesson, incorporating:
- A title page with consistent author and institution information
- "Where We Came From" and "Where We Are Going" slides
- Lesson objectives with highlighted action verbs (e.g., `\\textbf{Analyze} key events`)
- Discussion questions and in-class exercises
- Summary slides with key takeaways
2. **Previous Lesson Integration**:
- Retrieves and references prior lesson presentations to maintain consistent formatting and flow
- Preserves author and institution information across presentations
3. **Prompt Customization and Validation**:
- Supports custom prompts and specific guidance for tailored slide content
- Validates generated LaTeX for correct formatting and content quality
- Provides multiple retry attempts if validation fails
Dependencies
~~~~~~~~~~~~~
This module requires:
- `langchain_core`: For LLM chain creation and prompt handling
- `pathlib`: For file path management
- Custom utility modules for:
- Document loading (`load_documents`)
- LaTeX validation (`llm_validator`)
- Response parsing (`response_parsers`)
- Slide pipeline utilities (`slide_pipeline_utils`)
Usage
~~~~~~~
1. **Initialize BeamerBot**:
```python
beamer_bot = BeamerBot(
lesson_no=10,
llm=llm,
course_name="Political Science",
lesson_loader=lesson_loader,
output_dir=output_dir
)
```
2. **Generate Slides**:
```python
# Optional specific guidance
guidance = "Focus on comparing democratic and authoritarian systems"
slides = beamer_bot.generate_slides(specific_guidance=guidance)
```
3. **Save the Slides**:
```python
beamer_bot.save_slides(slides)
```
"""
import logging
from pathlib import Path
from typing import Any, Dict, Union
# env setup
from langchain_core.messages import SystemMessage
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.prompts import (ChatPromptTemplate,
HumanMessagePromptTemplate)
# base libraries
from class_factory.beamer_bot.slide_preamble import preamble
from class_factory.utils.base_model import BaseModel
from class_factory.utils.llm_validator import Validator
from class_factory.utils.load_documents import LessonLoader
from class_factory.utils.response_parsers import ValidatorResponse
from class_factory.utils.slide_pipeline_utils import (
clean_latex_content, comment_out_includegraphics, validate_latex)
# %%
[docs]
class BeamerBot(BaseModel):
"""
A class to generate LaTeX Beamer slides for a specified lesson using a language model (LLM).
BeamerBot automates the slide generation process, creating structured presentations based on lesson
readings, objectives, and content from prior presentations when available. Each slide is crafted
following a consistent format, and the generated LaTeX is validated for correctness.
Attributes:
lesson_no (int): Lesson number for which to generate slides.
llm: Language model instance for generating slides.
course_name (str): Name of the course for slide context.
lesson_loader (LessonLoader): Loader for accessing lesson readings and objectives.
output_dir (Path): Directory to save the generated Beamer slides.
slide_dir (Optional[Path]): Directory containing existing Beamer slides.
llm_response (str): Stores the generated LaTeX response from the LLM.
prompt (str): Generated prompt for the LLM.
lesson_objectives (optional, dict): user-provided lesson objectives if syllabus not available.
Methods:
generate_slides(specific_guidance: str = None, latex_compiler: str = "pdflatex") -> str:
Generates Beamer slides as LaTeX code for the specified lesson.
save_slides(latex_content: str) -> None:
Saves the generated LaTeX content to a .tex file.
set_user_objectives(objectives: Union[List[str], Dict[str, str]]):
Initialize user-defined lesson objectives, converting lists to dictionaries if needed. Inherited from BaseModel.
Internal Methods:
_format_readings_for_prompt() -> str:
Combines readings across lessons into a single string for the LLM prompt.
_find_prior_lesson(lesson_no: int, max_attempts: int = 3) -> Path:
Finds the most recent prior lesson's Beamer file to use as a template.
_load_prior_lesson() -> str:
Loads the LaTeX content of a prior lesson's Beamer presentation as a string.
_generate_prompt() -> str:
Constructs the LLM prompt using lesson objectives, readings, and prior lesson content.
_validate_llm_response(generated_slides: str, objectives: str, readings: str, last_presentation: str,
prompt_specific_guidance: str = "", additional_guidance: str = "") -> Dict[str, Any]:
Validates the generated LaTeX for quality and accuracy.
"""
def __init__(self, lesson_no: int, llm, course_name: str, lesson_loader: LessonLoader,
output_dir: Union[Path, str] = None, verbose: bool = False,
slide_dir: Union[Path, str] = None, lesson_objectives: dict = None):
super().__init__(lesson_no=lesson_no, course_name=course_name, lesson_loader=lesson_loader,
output_dir=output_dir, verbose=verbose)
self.llm = llm
self.llm_response = None
# Determine slide directory
if slide_dir:
self.lesson_loader.slide_dir = slide_dir
self.slide_dir = self.lesson_loader.slide_dir
elif not slide_dir and not self.lesson_loader.slide_dir:
self.logger.warning(
"No slide directory provided directly or through lesson loader. "
"Some functionality, such as loading prior presentations, may be limited."
)
self.readings = self._format_readings_for_prompt() # Adjust reading formatting
self.user_objectives = self.set_user_objectives(lesson_objectives, range(self.lesson_no, self.lesson_no+1)) if lesson_objectives else {}
# Initialize chain and validator
self.prompt = self._generate_prompt()
parser = StrOutputParser()
self.chain = self.prompt | self.llm | parser
self.validator = Validator(llm=self.llm, parser=JsonOutputParser(pydantic_object=ValidatorResponse), log_level=self.logger.level)
# Verify the Beamer file from the previous lesson
self.prior_lesson = self.lesson_no - 1 # default prior lesson, updated when find prior beamer presentation
self.beamer_output = self.output_dir / f'L{self.lesson_no}.tex'
def _format_readings_for_prompt(self) -> str:
"""
Format readings as a single string for use in the LLM prompt.
Returns:
str: Combined readings across all specified lessons for the LLM prompt.
"""
all_readings_dict = self._load_readings(self.lesson_no)
combined_readings = "\n\n".join(f"Lesson {lesson}: {', '.join(readings)}"
for lesson, readings in all_readings_dict.items())
return combined_readings
def _find_prior_lesson(self, lesson_no: int, max_attempts: int = 3) -> Path:
"""
Find the most recent prior lesson's Beamer file to use as a template.
Args:
lesson_no (int): The current lesson number.
max_attempts (int): Number of prior lessons to attempt to retrieve. Defaults to 3.
Returns:
Path: The path to the located Beamer file.
Raises:
FileNotFoundError: If no valid prior lesson file is found within max_attempts.
"""
for i in range(1, max_attempts + 1):
prior_lesson = lesson_no - i
beamer_file = self.lesson_loader.slide_dir / f'L{prior_lesson}.tex'
# Check if the Beamer file exists for this prior lesson
if beamer_file.is_file():
self.prior_lesson = int(prior_lesson)
self.logger.info(f"Found prior lesson: Lesson {prior_lesson}")
return beamer_file
# Raise error if no valid prior Beamer file is found within the attempts
raise FileNotFoundError(f"No prior Beamer file found within the last {max_attempts} lessons.")
def _load_prior_lesson(self) -> str:
"""
Load the previous lesson's Beamer presentation as a string.
"""
beamer_example = self.lesson_loader.find_prior_beamer_presentation(self.lesson_no)
return self.lesson_loader.load_beamer_presentation(beamer_example)
def _generate_prompt(self, human_prompt: str = None) -> str:
"""
Generates a detailed prompt for the LLM to guide LaTeX Beamer slide creation.
Returns:
str: The constructed prompt for the LLM.
"""
slide_system_prompt = """You are a LaTeX Beamer specialist and a political scientist with expertise in {course_name}.
Your task is to create content for a college-level lesson using the Beamer presentation format.
Focus on clarity, relevance, and adherence to LaTeX standards."""
slide_human_prompt = """
## Create a LaTeX Beamer presentation following the below guidelines:
### Source Documents and Examples
1. **Lesson Objectives**:
- We are on lesson {lesson_no}.
- Ensure each slide works toward the following lesson objectives:
{objectives}
2. **Lesson Readings**:
- Use these readings to guide your slide content:
{information}
---
### General Format to Follow:
1. **Title Slide**:
- Copy the prior lesson's title slide, **include author and institution from the last presentation**.
2. **Where We Came From**
- The subject of last lesson
- The readings from last lesson (Lesson {prior_lesson}).
3. **Where We Are Going**
- The subject of the current lesson
- The readings for the current lesson (Lesson {lesson_no}).
4. **Lesson Objectives**:
- The action in each lesson objective should be bolded (e.g. '\\textbf(Understand) the role of government.')
5. **Discussion Question**:
- Add a thought-provoking question based on lesson material to initiate conversation.
6. **Lecture Slides**:
- Cover key points from the lesson objectives and readings.
- Ensure logical flow and alignment with the objectives.
7. **In-Class Exercise**:
- Add an interactive exercise to engage and re-energize students.
- This exercise should occur about halfway through the lecture slides, to get students re-engaged.
8. **Key Takeaways**:
- Conclude with three primary takeaways from the lesson. These should emphasize the most critical points.
---
### Specific guidance for this lesson:
{specific_guidance}
---
### Example of Expected Output:
% This is an example format only. Use the provided last lesson as your primary source.
% Replace the example \\author{{}} and \\institute{{}} below with the corresponding values from last lesson's presentation
\\title{{Lesson 5: Interest Groups}}
\\author{{}}
\\institute[]{{}}
\\date{{\\today}}
\\begin{{document}}
\\section{{Introduction}}
\\begin{{frame}}
\\titlepage
\\end{{frame}}
...
\\end{{document}}
{additional_guidance}
---
### Example of previous presentation:
- Use the presentation from last lesson as an example for formatting and structure:
{last_presentation}
---
### IMPORTANT:
- Use valid LaTeX syntax.
- The output should contain **only** LaTeX code, with no extra explanations.
- Start at the point in the preamble where we call \\title.
- Failure to follow the format and style of the last lesson's presentation may result in the output being rejected.
- Use the **same author and institute** as provided in the last lesson’s presentation. Do not invent new names or institutions. Copy these values exactly from the prior lesson.
- If unable to identify the author and institute from the last lesson, just leave them blank.
- Failure to follow these instructions will result in the output being rejected.
"""
prompt = ChatPromptTemplate.from_messages(
[
SystemMessage(
content=(
slide_system_prompt.format(course_name=self.course_name)
)
),
HumanMessagePromptTemplate.from_template(slide_human_prompt if not human_prompt else human_prompt),
]
)
return prompt
[docs]
def generate_slides(self, specific_guidance: str = None, lesson_objectives: dict = None, latex_compiler: str = "pdflatex") -> str:
"""
Generate LaTeX Beamer slides for the lesson using the language model.
Args:
specific_guidance (str, optional): Custom instructions for slide content and structure
lesson_objectives (dict, optional): Override default objectives with custom ones
Format: {lesson_number: "objective text"}
latex_compiler (str, optional): LaTeX compiler to use for validation. Defaults to "pdflatex"
Returns:
str: Complete LaTeX content for the presentation, including preamble
Raises:
ValueError: If validation fails after maximum retry attempts
FileNotFoundError: If required prior lesson files cannot be located
Note:
The method includes multiple validation steps:
1. Content quality validation through LLM
2. LaTeX syntax validation using specified compiler
3. Up to 3 retry attempts if validation fails
"""
# Load objectives (last, current, next), readings, and previous lesson slides
self.user_objectives = self.set_user_objectives(lesson_objectives, range(self.lesson_no, self.lesson_no+1)) if lesson_objectives else {}
objectives_text = "\n\n".join([self._get_lesson_objectives(lesson) for lesson in range(self.lesson_no - 1, self.lesson_no + 2)])
combined_readings_text = self.readings
if self.lesson_loader.slide_dir:
prior_lesson = self._load_prior_lesson()
else:
prior_lesson = "Not Provided"
self.logger.warning(
"No slide_dir provided. Prior slides will not be referenced during generation. "
"If this is unintentional, please check LessonLoader configuration for slide_dir."
)
self.logger.info(f"{self.prompt=}")
# Generate Beamer slides via the chain
additional_guidance = ""
retries, MAX_RETRIES = 0, 3
valid = False
while not valid and retries < MAX_RETRIES:
response = self.chain.invoke({
"objectives": objectives_text,
"information": combined_readings_text,
"last_presentation": self.prior_lesson,
"lesson_no": self.lesson_no,
"prior_lesson": int(self.lesson_no) - 1,
'specific_guidance': specific_guidance if specific_guidance else "Not provided.",
"additional_guidance": additional_guidance
})
val_response = self._validate_llm_response(generated_slides=response,
objectives=objectives_text,
readings=combined_readings_text,
last_presentation=prior_lesson,
prompt_specific_guidance=specific_guidance if specific_guidance else "Not provided.")
# Validate raw LLM response for quality
self.validator.logger.info(f"Validation output: {val_response}")
if int(val_response['status']) != 1:
retries += 1
additional_guidance = val_response.get("additional_guidance", "")
self.validator.logger.warning(
f"Response validation failed on attempt {retries}. "
f"Guidance for improvement: {additional_guidance}"
)
continue # Retry LLM generation
# Clean and format the LaTeX output
cleaned_latex = clean_latex_content(response)
full_latex = preamble + "\n\n" + comment_out_includegraphics(cleaned_latex)
self.llm_response = full_latex
# Validate the generated LaTeX code
is_valid_latex = False # Reset each iteration
try:
is_valid_latex = validate_latex(full_latex, latex_compiler=latex_compiler)
except Exception as e:
self.logger.error(f"LaTeX validation encountered an error: {e}")
if is_valid_latex:
valid = True
return full_latex
else:
retries += 1 # Increment retries only if validation fails
self.logger.warning("\nLaTeX code is invalid. Attempting a second model run. "
"If the error persists, please review the LLM output for potential causes. "
"You can inspect the model output via the 'llm_response' object (BeamerBot.llm_response). "
"\n\nNote: Compilation issues may stem from syntax errors in the example LaTeX code provided to the model."
)
# Handle validation failure after max retries
if not valid:
raise ValueError("Validation failed after max retries. Ensure correct prompt and input data. Consider trying a different LLM.")
def _validate_llm_response(self, generated_slides: str, objectives: str, readings: str, last_presentation: str,
prompt_specific_guidance: str = "", additional_guidance: str = "") -> Dict[str, Any]:
"""
Validates the generated LaTeX slides for content quality and formatting accuracy.
Args:
generated_slides (str): LaTeX content generated by the LLM
objectives (str): Formatted string of lesson objectives for validation
readings (str): Formatted string of lesson readings for content verification
last_presentation (str): Content from prior lesson's presentation for format consistency
prompt_specific_guidance (str, optional): Custom guidance provided during generation
additional_guidance (str, optional): Supplementary guidance for validation refinement
Returns:
Dict[str, Any]: Validation results containing:
- status (int): 1 for pass, 0 for fail
- evaluation_score (float): Quality score (0-10)
- additional_guidance (str): Suggestions for improvement if validation fails
- rationale (str): Explanation of the validation result
Note:
The validation process uses the same prompt template as slide generation to ensure
consistency between requirements and validation criteria.
"""
# Validate quiz quality and accuracy
response_str = str(generated_slides)
validation_prompt = self.prompt.format(
objectives=objectives,
information=readings,
last_presentation=last_presentation,
lesson_no=self.lesson_no,
prior_lesson=self.prior_lesson,
additional_guidance=additional_guidance,
specific_guidance=prompt_specific_guidance
)
val_response = self.validator.validate(task_description=validation_prompt,
generated_response=response_str,
min_eval_score=8,
specific_guidance="Pay attention to the concepts introduced and their accuracy with respect to the texts.")
return val_response
[docs]
def save_slides(self, latex_content: str):
"""
Save the generated LaTeX content to a .tex file.
Args:
latex_content (str): The LaTeX content to save.
"""
with open(self.beamer_output, 'w', encoding='utf-8') as f:
f.write(latex_content)
self.logger.info(f"Slides saved to {self.beamer_output}")
if __name__ == "__main__":
import os
from dotenv import load_dotenv
from langchain_community.llms import Ollama
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from pyprojroot.here import here
from class_factory.utils.tools import reset_loggers
wd = here()
load_dotenv()
user_home = Path.home()
reset_loggers(log_level=logging.INFO)
OPENAI_KEY = os.getenv('openai_key')
OPENAI_ORG = os.getenv('openai_org')
GEMINI_KEY = os.getenv('gemini_api_key')
# Paths for readings, slides, and syllabus
reading_dir = user_home / os.getenv('readingsDir')
slide_dir = user_home / os.getenv('slideDir')
syllabus_path = user_home / os.getenv('syllabus_path')
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0.4,
max_tokens=None,
timeout=None,
max_retries=2,
api_key=OPENAI_KEY,
organization=OPENAI_ORG,
)
# llm = ChatGoogleGenerativeAI(
# model="gemini-1.5-flash-8b",
# temperature=0.4,
# max_tokens=None,
# timeout=None,
# max_retries=2,
# api_key=GEMINI_KEY
# )
lsn = 3
# llm = Ollama(
# model="llama3.1",
# temperature=0.2
# )
specific_guidance = """
The lesson should be structured in a way that discusses big picture ideas about political parties and their influence.
The slides will, at a minimum, cover the following:
- The role of parties in government and society
- How parties have changed over time
- Relating parties to Tocqueville's notion of associations
- Have a slide for each of the 5 functions of parties: Recruit Candidates, Nominate Candidates, Get Out the Vote (GOTV), Facilitate Electoral Choice, Influence National Government
"""
loader = LessonLoader(syllabus_path=syllabus_path,
reading_dir=reading_dir,
slide_dir=slide_dir)
# Initialize the BeamerBot
beamer_bot = BeamerBot(
lesson_no=2,
lesson_loader=loader,
llm=llm,
course_name="American Government",
verbose=True
)
# Generate slides for Lesson 20
slides = beamer_bot.generate_slides(lesson_objectives={"3": "do nothing today"})
# Save the generated LaTeX slides
# beamer_bot.save_slides(slides)
# %%