"""
**BeamerBot Module**
--------------------
The `BeamerBot` module provides a framework for generating structured LaTeX Beamer slides based on lesson objectives, readings, and prior lesson presentations. By using a language model (LLM), `BeamerBot` automates the process of slide creation, ensuring a consistent slide structure while allowing for custom guidance and validation.
Key Functionalities
~~~~~~~~~~~~~~~~~~~
1. **Automated Slide Generation**:
- `BeamerBot` generates a LaTeX Beamer presentation for each lesson, incorporating:
- A title page with consistent author and institution information
- "Where We Came From" and "Where We Are Going" slides
- Lesson objectives with highlighted action verbs (e.g., `\\textbf{Analyze} key events`)
- Discussion questions and in-class exercises
- Summary slides with key takeaways
2. **Previous Lesson Integration**:
- Retrieves and references prior lesson presentations to maintain consistent formatting and flow
- Preserves author and institution information across presentations
3. **Prompt Customization and Validation**:
- Supports custom prompts and specific guidance for tailored slide content
- Validates generated LaTeX for correct formatting and content quality
- Provides multiple retry attempts if validation fails
Dependencies
~~~~~~~~~~~~~
This module requires:
- `langchain_core`: For LLM chain creation and prompt handling
- `pathlib`: For file path management
- Custom utility modules for:
- Document loading (`load_documents`)
- LaTeX validation (`llm_validator`)
- Response parsing (`response_parsers`)
- Slide pipeline utilities (`slide_pipeline_utils`)
Usage
~~~~~~~
1. **Initialize BeamerBot**:
```python
beamer_bot = BeamerBot(
lesson_no=10,
llm=llm,
course_name="Political Science",
lesson_loader=lesson_loader,
output_dir=output_dir
)
```
2. **Generate Slides**:
```python
# Optional specific guidance
guidance = "Focus on comparing democratic and authoritarian systems"
slides = beamer_bot.generate_slides(specific_guidance=guidance)
```
3. **Save the Slides**:
```python
beamer_bot.save_slides(slides)
```
"""
# %%
import logging
from pathlib import Path
from typing import Any, Dict, Union
# env setup
from langchain_core.messages import SystemMessage
from langchain_core.prompts import (ChatPromptTemplate,
HumanMessagePromptTemplate)
from class_factory.beamer_bot.beamer_prompts import (beamer_human_prompt,
beamer_system_prompt)
from class_factory.beamer_bot.beamer_slides import BeamerSlides, Slide
# base libraries
from class_factory.beamer_bot.slide_preamble import preamble
from class_factory.utils.base_model import BaseModel
from class_factory.utils.llm_validator import Validator
from class_factory.utils.load_documents import LessonLoader
from class_factory.utils.slide_pipeline_utils import (
comment_out_includegraphics, validate_latex)
[docs]
class BeamerBot(BaseModel):
"""
A class to generate LaTeX Beamer slides for a specified lesson using a language model (LLM).
BeamerBot automates the slide generation process, creating structured presentations based on lesson
readings, objectives, and content from prior presentations when available. Each slide is crafted
following a consistent format, and the generated LaTeX is validated for correctness.
Attributes:
lesson_no (int): Lesson number for which to generate slides.
llm: Language model instance for generating slides.
course_name (str): Name of the course for slide context.
lesson_loader (LessonLoader): Loader for accessing lesson readings and objectives.
output_dir (Path): Directory to save the generated Beamer slides.
slide_dir (Optional[Path]): Directory containing existing Beamer slides.
llm_response (str): Stores the generated LaTeX response from the LLM.
prompt (str): Generated prompt for the LLM.
lesson_objectives (optional, dict): user-provided lesson objectives if syllabus not available.
Methods:
generate_slides(specific_guidance: str = None, latex_compiler: str = "pdflatex") -> str:
Generates Beamer slides as LaTeX code for the specified lesson.
save_slides(latex_content: str) -> None:
Saves the generated LaTeX content to a .tex file.
set_user_objectives(objectives: Union[List[str], Dict[str, str]]):
Initialize user-defined lesson objectives, converting lists to dictionaries if needed. Inherited from BaseModel.
Internal Methods:
_format_readings_for_prompt() -> str:
Combines readings across lessons into a single string for the LLM prompt.
_find_prior_lesson(lesson_no: int, max_attempts: int = 3) -> Path:
Finds the most recent prior lesson's Beamer file to use as a template.
_load_prior_lesson() -> str:
Loads the LaTeX content of a prior lesson's Beamer presentation as a string.
_generate_prompt() -> str:
Constructs the LLM prompt using lesson objectives, readings, and prior lesson content.
_validate_llm_response(generated_slides: str, objectives: str, readings: str, last_presentation: str,
prompt_specific_guidance: str = "", additional_guidance: str = "") -> Dict[str, Any]:
Validates the generated LaTeX for quality and accuracy.
"""
def __init__(self, lesson_no: int, llm, course_name: str, lesson_loader: LessonLoader,
output_dir: Union[Path, str] = None, verbose: bool = False,
slide_dir: Union[Path, str] = None, lesson_objectives: dict = None):
super().__init__(lesson_no=lesson_no, course_name=course_name, lesson_loader=lesson_loader,
output_dir=output_dir, verbose=verbose)
self.llm = llm
self.llm_response = None
# Determine slide directory
if slide_dir:
self.lesson_loader.slide_dir = slide_dir
self.slide_dir = self.lesson_loader.slide_dir
elif not slide_dir and not self.lesson_loader.slide_dir:
self.logger.warning(
"No slide directory provided directly or through lesson loader. "
"Some functionality, such as loading prior presentations, may be limited."
)
self.readings = self._format_readings_for_prompt() # Adjust reading formatting
self.user_objectives = self.set_user_objectives(lesson_objectives, range(self.lesson_no, self.lesson_no+1)) if lesson_objectives else {}
self.prompt = self._generate_prompt()
# Use LLM with structured output
self.chain = self.prompt | self.llm.with_structured_output(BeamerSlides)
self.validator = Validator(llm=self.llm, log_level=self.logger.level)
# Verify the Beamer file from the previous lesson
self.prior_lesson = self.lesson_no - 1 # default prior lesson, updated when find prior beamer presentation
self.output_dir.mkdir(parents=True, exist_ok=True)
self.beamer_output = self.output_dir / f'L{self.lesson_no}.tex'
def _format_readings_for_prompt(self) -> str:
"""
Format readings as a single string for use in the LLM prompt.
Returns:
str: Combined readings across all specified lessons for the LLM prompt.
"""
all_readings_dict = self._load_readings(self.lesson_no)
combined_readings = []
for lesson, readings in all_readings_dict.items():
for idx, reading in enumerate(readings, start=1):
combined_readings.append(
f"Lesson {lesson}, Reading {idx}:\n{reading}\n")
return "\n".join(combined_readings)
def _find_prior_lesson(self, lesson_no: int, max_attempts: int = 3) -> Path:
"""
Find the most recent prior lesson's Beamer file to use as a template.
Args:
lesson_no (int): The current lesson number.
max_attempts (int): Number of prior lessons to attempt to retrieve. Defaults to 3.
Returns:
Path: The path to the located Beamer file.
Raises:
FileNotFoundError: If no valid prior lesson file is found within max_attempts.
"""
for i in range(1, max_attempts + 1):
prior_lesson = lesson_no - i
beamer_file = self.lesson_loader.slide_dir / f'L{prior_lesson}.tex'
# Check if the Beamer file exists for this prior lesson
if beamer_file.is_file():
self.prior_lesson = int(prior_lesson)
self.logger.info(f"Found prior lesson: Lesson {prior_lesson}")
return beamer_file
# Raise error if no valid prior Beamer file is found within the attempts
raise FileNotFoundError(f"No prior Beamer file found within the last {max_attempts} lessons.")
def _load_prior_lesson(self) -> str:
"""
Load the previous lesson's Beamer presentation as a string.
"""
beamer_example = self.lesson_loader.find_prior_beamer_presentation(self.lesson_no)
return self.lesson_loader.load_beamer_presentation(beamer_example)
def _generate_prompt(self, human_prompt: str = None) -> str:
"""
Generates a detailed prompt for the LLM to guide LaTeX Beamer slide creation.
Returns:
str: The constructed prompt for the LLM.
"""
prompt = ChatPromptTemplate.from_messages(
[
SystemMessage(
content=(
beamer_system_prompt.format(
course_name=self.course_name)
)
),
HumanMessagePromptTemplate.from_template(
beamer_human_prompt if not human_prompt else human_prompt),
]
)
return prompt
[docs]
def generate_slides(self, specific_guidance: str = None, lesson_objectives: dict = None,
latex_compiler: str = "pdflatex") -> str:
"""
Generate LaTeX Beamer slides for the lesson using the language model.
Args:
specific_guidance (str, optional): Custom instructions for slide content and structure
lesson_objectives (dict, optional): Override default objectives with custom ones
Format: {lesson_number: "objective text"}
latex_compiler (str, optional): LaTeX compiler to use for validation. Defaults to "pdflatex"
Returns:
str: Complete LaTeX content for the presentation, including preamble
Raises:
ValueError: If validation fails after maximum retry attempts
FileNotFoundError: If required prior lesson files cannot be located
Note:
The method includes multiple validation steps:
1. Content quality validation through LLM
2. LaTeX syntax validation using specified compiler
3. Up to 3 retry attempts if validation fails
"""
# Load objectives (last, current, next), readings, and previous lesson slides
self.user_objectives = self.set_user_objectives(lesson_objectives, range(self.lesson_no, self.lesson_no+1)) if lesson_objectives else {}
objectives_text = "\n\n".join([self._get_lesson_objectives(lesson)
for lesson in range(self.lesson_no - 1, self.lesson_no + 2)])
combined_readings_text = self.readings
if self.lesson_loader.slide_dir:
prior_lesson_tex = self._load_prior_lesson()
else:
prior_lesson_tex = "Not Provided"
self.logger.warning(
"No slide_dir provided. Prior slides will not be referenced during generation. "
"If this is unintentional, please check LessonLoader configuration for slide_dir."
)
self.logger.info(f"{self.prompt=}")
# Generate Beamer slides via the chain
additional_guidance = ""
retries, MAX_RETRIES = 0, 3
valid = False
while not valid and retries < MAX_RETRIES:
# LLM returns a list of dicts (slides)
slides_data = self.chain.invoke({
"objectives": objectives_text,
"information": combined_readings_text,
"last_presentation": prior_lesson_tex,
"lesson_no": self.lesson_no,
'specific_guidance': specific_guidance if specific_guidance else "Not provided.",
"additional_guidance": additional_guidance
})
# Parse LLM output into Slide objects
latex_body = slides_data.to_latex()
full_latex = preamble + "\n\n" + comment_out_includegraphics(latex_body)
self.llm_response = full_latex
# Validate the structured output and LaTeX
val_response = self._validate_llm_response(
generated_slides=slides_data,
objectives=objectives_text,
readings=combined_readings_text,
last_presentation=prior_lesson_tex,
prompt_specific_guidance=specific_guidance if specific_guidance else "Not provided.",
task_schema=BeamerSlides.model_json_schema()
)
self.validator.validation_result = val_response
self.validator.logger.info(f"Validation output: {val_response}")
# Use both status and overall_score for validation
if int(val_response.get('status', 0)) != 1:
retries += 1
additional_guidance = val_response.get("additional_guidance", "")
self.validator.logger.warning(
f"Response validation failed on attempt {retries}. "
f"Guidance for improvement: {additional_guidance}"
)
continue # Retry LLM generation
# Validate the generated LaTeX code
is_valid_latex = False # Reset each iteration
try:
is_valid_latex = validate_latex(full_latex, latex_compiler=latex_compiler)
except Exception as e:
self.logger.error(f"LaTeX validation encountered an error: {e}")
if is_valid_latex:
valid = True
return full_latex
else:
retries += 1 # Increment retries only if validation fails
self.logger.warning("\nLaTeX code is invalid. Attempting a second model run. "
"If the error persists, please review the LLM output for potential causes. "
"You can inspect the model output via the 'llm_response' object (BeamerBot.llm_response). "
"\n\nNote: Compilation issues may stem from syntax errors in the example LaTeX code provided to the model."
)
# Handle validation failure after max retries
if not valid:
raise ValueError("Validation failed after max retries. Ensure correct prompt and input data. Consider trying a different LLM.")
def _validate_llm_response(self, generated_slides, objectives: str, readings: str, last_presentation: str,
prompt_specific_guidance: str = "", additional_guidance: str = "", task_schema=None) -> Dict[str, Any]:
"""
Validates the generated LaTeX slides for content quality and formatting accuracy.
Args:
generated_slides (str): LaTeX content generated by the LLM
objectives (str): Formatted string of lesson objectives for validation
readings (str): Formatted string of lesson readings for content verification
last_presentation (str): Content from prior lesson's presentation for format consistency
prompt_specific_guidance (str, optional): Custom guidance provided during generation
additional_guidance (str, optional): Supplementary guidance for validation refinement
Returns:
Dict[str, Any]: Validation results containing:
- status (int): 1 for pass, 0 for fail
- evaluation_score (float): Quality score (0-10)
- additional_guidance (str): Suggestions for improvement if validation fails
- rationale (str): Explanation of the validation result
Note:
The validation process uses the same prompt template as slide generation to ensure
consistency between requirements and validation criteria.
"""
# Validate quiz quality and accuracy
response_str = str(generated_slides)
validation_prompt = self.prompt.format(
objectives=objectives,
information=readings,
last_presentation=last_presentation,
lesson_no=self.lesson_no,
prior_lesson=self.prior_lesson,
additional_guidance=additional_guidance,
specific_guidance=prompt_specific_guidance
)
val_response = self.validator.validate(
task_description=validation_prompt,
generated_response=generated_slides,
task_schema=task_schema,
specific_guidance=prompt_specific_guidance + ("\n" + additional_guidance if additional_guidance else "")
)
return val_response
[docs]
def save_slides(self, latex_content: str, output_dir: Union[Path, str] = None) -> None:
"""
Save the generated LaTeX content to a .tex file.
Args:
latex_content (str): The LaTeX content to save.
"""
if output_dir:
beamer_file = Path(output_dir) / f'L{self.lesson_no}.tex'
with open(beamer_file, 'w', encoding='utf-8') as f:
f.write(latex_content)
else:
with open(self.beamer_output, 'w', encoding='utf-8') as f:
f.write(latex_content)
self.logger.info(f"Slides saved to {self.beamer_output}")
# %%
if __name__ == "__main__":
import os
import yaml
from dotenv import load_dotenv
from langchain_community.llms import Ollama
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from pyprojroot.here import here
from class_factory.utils.tools import reset_loggers
wd = here()
load_dotenv()
user_home = Path.home()
reset_loggers(log_level=logging.INFO)
OPENAI_KEY = os.getenv('openai_key')
OPENAI_ORG = os.getenv('openai_org')
GEMINI_KEY = os.getenv('gemini_api_key')
# Path definitions
with open("class_config.yaml", "r") as file:
config = yaml.safe_load(file)
# class_config = config['PS491']
class_config = config['PS460']
slide_dir = user_home / class_config['slideDir']
syllabus_path = user_home / class_config['syllabus_path']
readingsDir = user_home / class_config['reading_dir']
is_tabular_syllabus = class_config['is_tabular_syllabus']
# llm = ChatOpenAI(
# model="gpt-4o-mini",
# temperature=0.4,
# max_tokens=None,
# timeout=None,
# max_retries=2,
# api_key=OPENAI_KEY,
# organization=OPENAI_ORG,
# )
llm = ChatGoogleGenerativeAI(
model="gemini-2.0-flash",
temperature=0.4,
max_tokens=None,
timeout=None,
max_retries=2,
api_key=GEMINI_KEY
)
lsn = 3
# llm = Ollama(
# model="llama3.1",
# temperature=0.2
# )
specific_guidance = """
The objectives slide should include an objective titled "have tons of fun"
"""
loader = LessonLoader(syllabus_path=syllabus_path,
reading_dir=readingsDir,
slide_dir=slide_dir,
tabular_syllabus=is_tabular_syllabus
)
# Initialize the BeamerBot
beamer_bot = BeamerBot(
lesson_no=11,
lesson_loader=loader,
llm=llm,
course_name="Civil-Military Relations",
verbose=True
)
# Generate slides for Lesson 20
slides = beamer_bot.generate_slides(lesson_objectives={"11": "Learn Feaver's principal-agent theory"},)
print(slides)
# Save the generated LaTeX slides
# beamer_bot.save_slides(slides)
# %%