200 lines
5.7 KiB
Go
200 lines
5.7 KiB
Go
/*
|
|
Copyright © 2024 Matteo Schiff <matteo@underdesk.net>
|
|
|
|
*/
|
|
|
|
package llm
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/sashabaranov/go-openai"
|
|
)
|
|
|
|
type StructureBuilder struct {
|
|
client *openai.Client
|
|
useCache bool
|
|
}
|
|
|
|
func NewStructureBuilder(client *openai.Client) *StructureBuilder {
|
|
return &StructureBuilder{
|
|
client: client,
|
|
useCache: true,
|
|
}
|
|
}
|
|
|
|
func (wt *StructureBuilder) cacheFileName(inputAudio string) string {
|
|
return inputAudio + ".splitted.txt"
|
|
}
|
|
|
|
func (wt *StructureBuilder) loadCache(name string) (map[string]ParagraphItem, error) {
|
|
contentJson, err := os.ReadFile(name)
|
|
|
|
var content map[string]ParagraphItem
|
|
err = json.Unmarshal(contentJson, &content)
|
|
|
|
return content, err
|
|
}
|
|
|
|
func (wt *StructureBuilder) saveCache(name string, content map[string]ParagraphItem) error {
|
|
contentJson, _ := json.Marshal(content)
|
|
|
|
err := os.WriteFile(name, contentJson, 0666)
|
|
|
|
return err
|
|
}
|
|
|
|
type ParagraphItem struct {
|
|
Title string
|
|
Type string
|
|
Content string `json:"-"`
|
|
}
|
|
|
|
func (wt *StructureBuilder) splitUsingLLM(transcriptionSentences []string) map[string]ParagraphItem {
|
|
systemPrompt := openai.ChatCompletionMessage{
|
|
Role: openai.ChatMessageRoleSystem,
|
|
Content: `
|
|
You are given a transcription of a conversation or speech. Your task is to identify where new topics or arguments begin within the text. A new topic is marked by a shift in subject, a new argument being introduced, or a clear change in focus. Additionally, ensure that each identified segment (paragraph) contains more than just a few sentences.
|
|
For each identified new topic, write a brief title or summary describing it. Additionally, assign a heading type based on the importance or hierarchy of the topic. Use the following rules for heading types:
|
|
|
|
"heading1" for main sections or major topics.
|
|
"heading2" for subtopics or important subsections within a main topic.
|
|
"heading3" for smaller, more detailed sections within a subtopic.
|
|
|
|
Input: The transcription will be provided as a series of lines. Each line is numbered for easy reference. Your task is to read through the transcription and identify the points where a new topic or argument begins.
|
|
|
|
The previous output will be provided as input to guide you on where new sections begin and their existing structure. Use this to extend the structure as needed.
|
|
Output: The output should be formatted as a JSON object, where each key is the line number where a new topic starts, and the corresponding value is an object containing the title of that topic and the heading type. For example:
|
|
Example:
|
|
{
|
|
"0": {
|
|
"Title": "Example title",
|
|
"Type": "heading1"
|
|
},
|
|
"4": {
|
|
"Title": "Example subtitle",
|
|
"Type": "heading2"
|
|
},
|
|
"9": {
|
|
"Title": "Another example subtitle",
|
|
"Type": "heading2"
|
|
},
|
|
"14": {
|
|
"Title": "A third example subtitle",
|
|
"Type": "heading2"
|
|
}
|
|
}
|
|
|
|
Instruction: Please read the transcription carefully and list the line numbers where each new topic or argument starts, along with a brief title summarizing that topic. Use the previous output to guide your extensions, adding titles and types ("heading1", "heading2", "heading3") as necessary.`,
|
|
}
|
|
|
|
currentStart := 0
|
|
blockSize := 200
|
|
|
|
currentStructure := map[string]ParagraphItem{}
|
|
|
|
for currentStart < len(transcriptionSentences) {
|
|
if currentStart > len(transcriptionSentences) {
|
|
break
|
|
}
|
|
|
|
blockEnd := currentStart + blockSize
|
|
|
|
if blockEnd >= len(transcriptionSentences) {
|
|
blockEnd = len(transcriptionSentences) - 1
|
|
}
|
|
|
|
currentSlice := transcriptionSentences[currentStart:blockEnd]
|
|
|
|
userPromptText := ""
|
|
for i, sentence := range currentSlice {
|
|
userPromptText += fmt.Sprintf("%d: %s\n", i+currentStart, sentence)
|
|
}
|
|
|
|
currentJson, err := json.Marshal(currentStructure)
|
|
currentPrompt := openai.ChatCompletionMessage{
|
|
Role: openai.ChatMessageRoleUser,
|
|
Content: "Current structure is: " + string(currentJson),
|
|
}
|
|
|
|
userPrompt := openai.ChatCompletionMessage{
|
|
Role: openai.ChatMessageRoleUser,
|
|
Content: "Current transcription slice: " + userPromptText,
|
|
}
|
|
|
|
resp1, err := wt.client.CreateChatCompletion(
|
|
context.Background(),
|
|
openai.ChatCompletionRequest{
|
|
Model: "mistral-small-instruct",
|
|
Messages: []openai.ChatCompletionMessage{systemPrompt, currentPrompt, userPrompt},
|
|
ResponseFormat: &openai.ChatCompletionResponseFormat{
|
|
Type: openai.ChatCompletionResponseFormatTypeJSONObject,
|
|
},
|
|
},
|
|
)
|
|
|
|
if err != nil {
|
|
fmt.Printf("Splitting error: %v\n", err)
|
|
return map[string]ParagraphItem{}
|
|
}
|
|
|
|
var data map[string]ParagraphItem
|
|
json.Unmarshal([]byte(resp1.Choices[0].Message.Content), &data)
|
|
|
|
for k, v := range data {
|
|
currentStructure[k] = v
|
|
}
|
|
|
|
currentStart += blockSize
|
|
}
|
|
return currentStructure
|
|
}
|
|
|
|
func (wt *StructureBuilder) Split(splitFile string, transcription string) ([]ParagraphItem, error) {
|
|
transcriptionSentences := strings.Split(transcription, ".")
|
|
|
|
cache, err := wt.loadCache(splitFile)
|
|
|
|
var paragraphs map[string]ParagraphItem
|
|
if wt.useCache && err == nil {
|
|
paragraphs = cache
|
|
} else {
|
|
paragraphs = wt.splitUsingLLM(transcriptionSentences)
|
|
wt.saveCache(splitFile, paragraphs)
|
|
}
|
|
|
|
var startLines []int
|
|
for k, _ := range paragraphs {
|
|
q, err := strconv.Atoi(k)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
startLines = append(startLines, q)
|
|
}
|
|
sort.Ints(startLines)
|
|
|
|
var items []ParagraphItem
|
|
for i := range startLines {
|
|
q := len(transcriptionSentences) - 1
|
|
|
|
if i < len(startLines)-1 {
|
|
q = startLines[i+1]
|
|
}
|
|
|
|
var currentParagraph string
|
|
for j := startLines[i]; j <= q; j++ {
|
|
currentParagraph += transcriptionSentences[j] + "."
|
|
}
|
|
newItem := paragraphs[strconv.Itoa(startLines[i])]
|
|
newItem.Content = currentParagraph
|
|
items = append(items, newItem)
|
|
}
|
|
return items, nil
|
|
}
|