/* Copyright © 2024 Matteo Schiff */ package llm import ( "context" "encoding/json" "fmt" "os" "sort" "strconv" "strings" "github.com/sashabaranov/go-openai" ) type ParagraphDetector struct { client *openai.Client useCache bool } func NewParagraphDetector(client *openai.Client) *ParagraphDetector { return &ParagraphDetector{ client: client, useCache: true, } } func (wt *ParagraphDetector) cacheFileName(inputAudio string) string { return inputAudio + ".splitted.txt" } func (wt *ParagraphDetector) loadCache(name string) ([]int, error) { contentJson, err := os.ReadFile(wt.cacheFileName(name)) var content []int err = json.Unmarshal(contentJson, &content) return content, err } func (wt *ParagraphDetector) saveCache(name string, content []int) error { contentJson, _ := json.Marshal(content) err := os.WriteFile(wt.cacheFileName(name), contentJson, 0666) return err } type SplitResponse struct { Topics map[string]string `json:"Topics"` } func (s SplitResponse) MarshalJSON() ([]byte, error) { return json.Marshal(struct{ Topics map[string]string `json:"Topics"` }{ Topics: s.Topics, }) } func (wt *ParagraphDetector) splitUsingLLM(transcriptionSentences []string) []int { systemPrompt := openai.ChatCompletionMessage{ Role: openai.ChatMessageRoleSystem, //Content: "Identify topics in the following transcription. Topics are a group of sentences about the same argument. Format the output using a JSON integer array, where each item is the line number (integer). Each time you see one or two new topics, add the integer to the array.", Content: ` You are given a transcription of a conversation or speech. Your task is to identify where new topics or arguments begin within the text. A new topic is marked by a shift in subject, a new argument being introduced, or a clear change in focus. Additionally, ensure that each identified segment (paragraph) contains more than just a few sentences. If necessary, group smaller sections together so that each segment is substantial. Input: The transcription will be provided as a series of lines. Each line is numbered for easy reference. Your task is to read through the transcription and identify the points where a new topic or argument begins. If a segment is too short (fewer than three sentences), it should be grouped with an adjacent segment. For each identified new topic, write a brief title or summary describing it. Output: The output should be formatted as a JSON object, where each key is the line number where a new topic starts, and the corresponding value is the title of that topic. For example: json { "0": "Title of First Topic", "19": "Title of Second Topic" } Example: Transcription: vbnet 0: Hello, everyone. Today we're going to talk about climate change. 1: It's a complex issue, but I'll try to break it down. 2: Climate change refers to long-term shifts in temperatures and weather patterns. 3: In this discussion, we will cover the causes, effects, and possible solutions. 4: First, let's discuss the causes of climate change. 5: There are several factors, including greenhouse gas emissions. 6: Most emissions come from burning fossil fuels like coal, oil, and gas. 7: Another key cause is deforestation, which reduces the number of trees that can absorb CO2. 8: Deforestation not only affects CO2 levels but also disrupts ecosystems. 9: Next, let's move on to the effects of climate change. 10: Rising temperatures are one of the most obvious effects. 11: This leads to melting ice caps, rising sea levels, and extreme weather events. 12: We are already seeing more frequent and intense heatwaves, hurricanes, and floods. 13: The impact on wildlife is also severe, with many species facing habitat loss. 14: Finally, what can we do to address this issue? 15: One solution is to reduce our carbon footprint by using energy more efficiently. 16: Renewable energy sources, like wind and solar, play a big role here. 17: Governments and organizations worldwide are investing in clean energy technologies. 18: Individual actions, such as reducing waste and conserving water, also make a difference. 19: That's all for today's discussion on climate change. Thank you for listening. Expected Output: json { "0": "Introduction to Climate Change", "4": "Causes of Climate Change", "9": "Effects of Climate Change", "14": "Solutions to Climate Change" } Instruction: Please read the transcription carefully and list the line numbers where each new topic or argument starts, along with a brief title summarizing that topic. Ensure that shorter segments are grouped with neighboring content to form more substantial paragraphs before marking the start of a new topic.`, } currentStart := 0 blockSize := 200 var startLines []int for currentStart < len(transcriptionSentences) { if currentStart > len(transcriptionSentences) { break } blockEnd := currentStart+blockSize if blockEnd >= len(transcriptionSentences) { blockEnd = len(transcriptionSentences) - 1 } currentSlice := transcriptionSentences[currentStart : blockEnd] userPromptText := "" for i, sentence := range currentSlice { userPromptText += fmt.Sprintf("%d: %s\n", i, sentence) } userPrompt := openai.ChatCompletionMessage{ Role: openai.ChatMessageRoleUser, Content: userPromptText, } resp1, err := wt.client.CreateChatCompletion( context.Background(), openai.ChatCompletionRequest{ Model: "mistral-7b-instruct-v0.3", Messages: []openai.ChatCompletionMessage{systemPrompt, userPrompt}, ResponseFormat: &openai.ChatCompletionResponseFormat{ Type: openai.ChatCompletionResponseFormatTypeJSONObject, JSONSchema: &openai.ChatCompletionResponseFormatJSONSchema{ Name: "splitter", Strict: true, Schema: SplitResponse{}, }, }, }, ) if err != nil { fmt.Printf("Splitting error: %v\n", err) return []int{} } fmt.Printf(resp1.Choices[0].Message.Content) var data map[string]string json.Unmarshal([]byte(resp1.Choices[0].Message.Content), &data) //startLinesRaw = append(startLinesRaw, strings.Split(resp1.Choices[0].Message.Content, ",")...) for k, _ := range data { before, _, _ := strings.Cut(k, "-") q, err := strconv.Atoi(before) if err != nil { continue } startLines = append(startLines, currentStart+q) } fmt.Println(startLines) currentStart += blockSize } /*fmt.Println(startLinesRaw) var err error for i, n := range startLinesRaw { startLines[i], err = strconv.Atoi(n) if err != nil { return nil } }*/ sort.Ints(startLines) return startLines } func (wt *ParagraphDetector) Split(name string, transcription string) ([]string, error) { transcriptionSentences := strings.Split(transcription, ".") cache, err := wt.loadCache(name) var startLines []int if wt.useCache && err == nil { startLines = cache } else { startLines = wt.splitUsingLLM(transcriptionSentences) wt.saveCache(name, startLines) } var splittedTranscription []string for i := range startLines { q := len(transcriptionSentences) - 1 if i < len(startLines) - 1 { q = startLines[i+1] } var currentParagraph string for j := startLines[i]; j <= q; j++ { currentParagraph += transcriptionSentences[j] + "." } splittedTranscription = append(splittedTranscription, currentParagraph) } return splittedTranscription, nil }