Files
scriba/internal/llm/structure_builder.go
2024-11-09 22:57:47 +01:00

200 lines
5.7 KiB
Go

/*
Copyright © 2024 Matteo Schiff <matteo@underdesk.net>
*/
package llm
import (
"context"
"encoding/json"
"fmt"
"os"
"sort"
"strconv"
"strings"
"github.com/sashabaranov/go-openai"
)
type StructureBuilder struct {
client *openai.Client
useCache bool
}
func NewStructureBuilder(client *openai.Client) *StructureBuilder {
return &StructureBuilder{
client: client,
useCache: true,
}
}
func (wt *StructureBuilder) cacheFileName(inputAudio string) string {
return inputAudio + ".splitted.txt"
}
func (wt *StructureBuilder) loadCache(name string) (map[string]ParagraphItem, error) {
contentJson, err := os.ReadFile(name)
var content map[string]ParagraphItem
err = json.Unmarshal(contentJson, &content)
return content, err
}
func (wt *StructureBuilder) saveCache(name string, content map[string]ParagraphItem) error {
contentJson, _ := json.Marshal(content)
err := os.WriteFile(name, contentJson, 0666)
return err
}
type ParagraphItem struct {
Title string
Type string
Content string `json:"-"`
}
func (wt *StructureBuilder) splitUsingLLM(transcriptionSentences []string) map[string]ParagraphItem {
systemPrompt := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleSystem,
Content: `
You are given a transcription of a conversation or speech. Your task is to identify where new topics or arguments begin within the text. A new topic is marked by a shift in subject, a new argument being introduced, or a clear change in focus. Additionally, ensure that each identified segment (paragraph) contains more than just a few sentences.
For each identified new topic, write a brief title or summary describing it. Additionally, assign a heading type based on the importance or hierarchy of the topic. Use the following rules for heading types:
"heading1" for main sections or major topics.
"heading2" for subtopics or important subsections within a main topic.
"heading3" for smaller, more detailed sections within a subtopic.
Input: The transcription will be provided as a series of lines. Each line is numbered for easy reference. Your task is to read through the transcription and identify the points where a new topic or argument begins.
The previous output will be provided as input to guide you on where new sections begin and their existing structure. Use this to extend the structure as needed.
Output: The output should be formatted as a JSON object, where each key is the line number where a new topic starts, and the corresponding value is an object containing the title of that topic and the heading type. For example:
Example:
{
"0": {
"Title": "Example title",
"Type": "heading1"
},
"4": {
"Title": "Example subtitle",
"Type": "heading2"
},
"9": {
"Title": "Another example subtitle",
"Type": "heading2"
},
"14": {
"Title": "A third example subtitle",
"Type": "heading2"
}
}
Instruction: Please read the transcription carefully and list the line numbers where each new topic or argument starts, along with a brief title summarizing that topic. Use the previous output to guide your extensions, adding titles and types ("heading1", "heading2", "heading3") as necessary.`,
}
currentStart := 0
blockSize := 200
currentStructure := map[string]ParagraphItem{}
for currentStart < len(transcriptionSentences) {
if currentStart > len(transcriptionSentences) {
break
}
blockEnd := currentStart + blockSize
if blockEnd >= len(transcriptionSentences) {
blockEnd = len(transcriptionSentences) - 1
}
currentSlice := transcriptionSentences[currentStart:blockEnd]
userPromptText := ""
for i, sentence := range currentSlice {
userPromptText += fmt.Sprintf("%d: %s\n", i+currentStart, sentence)
}
currentJson, err := json.Marshal(currentStructure)
currentPrompt := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: "Current structure is: " + string(currentJson),
}
userPrompt := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: "Current transcription slice: " + userPromptText,
}
resp1, err := wt.client.CreateChatCompletion(
context.Background(),
openai.ChatCompletionRequest{
Model: "mistral-small-instruct",
Messages: []openai.ChatCompletionMessage{systemPrompt, currentPrompt, userPrompt},
ResponseFormat: &openai.ChatCompletionResponseFormat{
Type: openai.ChatCompletionResponseFormatTypeJSONObject,
},
},
)
if err != nil {
fmt.Printf("Splitting error: %v\n", err)
return map[string]ParagraphItem{}
}
var data map[string]ParagraphItem
json.Unmarshal([]byte(resp1.Choices[0].Message.Content), &data)
for k, v := range data {
currentStructure[k] = v
}
currentStart += blockSize
}
return currentStructure
}
func (wt *StructureBuilder) Split(splitFile string, transcription string) ([]ParagraphItem, error) {
transcriptionSentences := strings.Split(transcription, ".")
cache, err := wt.loadCache(splitFile)
var paragraphs map[string]ParagraphItem
if wt.useCache && err == nil {
paragraphs = cache
} else {
paragraphs = wt.splitUsingLLM(transcriptionSentences)
wt.saveCache(splitFile, paragraphs)
}
var startLines []int
for k, _ := range paragraphs {
q, err := strconv.Atoi(k)
if err != nil {
continue
}
startLines = append(startLines, q)
}
sort.Ints(startLines)
var items []ParagraphItem
for i := range startLines {
q := len(transcriptionSentences) - 1
if i < len(startLines)-1 {
q = startLines[i+1]
}
var currentParagraph string
for j := startLines[i]; j <= q; j++ {
currentParagraph += transcriptionSentences[j] + "."
}
newItem := paragraphs[strconv.Itoa(startLines[i])]
newItem.Content = currentParagraph
items = append(items, newItem)
}
return items, nil
}