Audio to funscript

Python script that converts audio files to funscript file.

import tkinter as tk
from tkinter import filedialog, messagebox
import json
import os
from pydub import AudioSegment
from pydub.utils import make_chunks

# Tooltip class for displaying tips
class CreateToolTip(object):
    def __init__(self, widget, text='widget info'):
        self.waittime = 500  # milliseconds
        self.wraplength = 300  # pixels
        self.widget = widget
        self.text = text
        self.widget.bind("<Enter>", self.enter)
        self.widget.bind("<Leave>", self.leave)
        self.widget.bind("<ButtonPress>", self.leave)
        self.id = None
        self.tw = None

    def enter(self, event=None):
        self.schedule()

    def leave(self, event=None):
        self.unschedule()
        self.hidetip()

    def schedule(self):
        self.unschedule()
        self.id = self.widget.after(self.waittime, self.showtip)

    def unschedule(self):
        id_ = self.id
        self.id = None
        if id_:
            self.widget.after_cancel(id_)

    def showtip(self, event=None):
        x, y, cx, cy = self.widget.bbox("insert")
        x += self.widget.winfo_rootx() + 25
        y += self.widget.winfo_rooty() + 20
        # Create toplevel window
        self.tw = tk.Toplevel(self.widget)
        self.tw.wm_overrideredirect(True)  # Removes window decorations
        self.tw.wm_geometry(f"+{x}+{y}")
        label = tk.Label(self.tw, text=self.text, justify='left',
                         background="#ffffe0", relief='solid', borderwidth=1,
                         wraplength=self.wraplength)
        label.pack(ipadx=1)

    def hidetip(self):
        tw = self.tw
        self.tw = None
        if tw:
            tw.destroy()

# Function to analyze audio and create funscript based on volume
def audio_to_funscript(audio_file, output_file, chunk_length_ms, silence_threshold, max_volume_threshold):
    # Load audio (supports mp3, wav, etc.)
    audio = AudioSegment.from_file(audio_file)
    
    # Convert to mono for easier analysis
    audio = audio.set_channels(1)
    
    # Split audio into chunks
    chunks = make_chunks(audio, chunk_length_ms)
    
    actions = []
    time_ms = 0

    for chunk in chunks:
        # Calculate loudness in dBFS (decibels relative to full scale)
        loudness = chunk.dBFS

        # Normalize loudness to range 0-100 for funscript
        # Map silence_threshold to 0 and max_volume_threshold to 100
        normalized = int(
            (loudness - silence_threshold) / (max_volume_threshold - silence_threshold) * 100
        )
        normalized = max(min(normalized, 100), 0)  # Clamp between 0 and 100

        # Append action to funscript
        actions.append({
            "pos": normalized,
            "at": time_ms
        })

        time_ms += chunk_length_ms

    # Create funscript structure
    funscript_data = {
        "version": "1.0",
        "inverted": False,
        "range": 90,
        "info": "Generated from audio volume",
        "actions": actions
    }

    # Write to .funscript file
    with open(output_file, 'w') as f:
        json.dump(funscript_data, f, indent=4)

# Function to select audio file and generate funscript
def select_file():
    # Get parameter values from GUI
    try:
        chunk_length_ms = int(chunk_length_entry.get())
        silence_threshold = float(silence_threshold_entry.get())
        max_volume_threshold = float(max_volume_threshold_entry.get())

        # Validate chunk length
        if not 20 <= chunk_length_ms <= 1000:
            messagebox.showerror("Invalid Input", "Chunk length must be between 20 ms and 1000 ms.")
            return

        # Validate volume thresholds
        if silence_threshold >= max_volume_threshold:
            messagebox.showerror("Invalid Input", "Silence threshold must be less than Max volume threshold.")
            return

    except ValueError:
        messagebox.showerror("Invalid Input", "Please enter valid numeric values.")
        return

    # Open file selection dialog
    audio_file = filedialog.askopenfilename(
        filetypes=[("Audio files", "*.mp3 *.wav")],
        title="Select Audio File"
    )

    if audio_file:
        # Generate output file name with .funscript extension
        output_file = os.path.splitext(audio_file)[0] + '.funscript'

        # Analyze and generate funscript
        audio_to_funscript(audio_file, output_file, chunk_length_ms, silence_threshold, max_volume_threshold)

        # Notify user
        messagebox.showinfo("Done", f"Funscript created at:\n{output_file}")

# GUI Setup
root = tk.Tk()
root.title("Audio to Funscript Converter")

# Instruction label
label = tk.Label(root, text="Select an audio file (MP3 or WAV) to convert to Funscript")
label.pack(pady=10)

# Frame for parameter inputs
params_frame = tk.Frame(root)
params_frame.pack(pady=10)

# Chunk length input
chunk_length_label = tk.Label(params_frame, text="Chunk Length (20-1000 ms):")
chunk_length_label.grid(row=0, column=0, sticky="e", padx=5, pady=5)
chunk_length_entry = tk.Entry(params_frame)
chunk_length_entry.insert(0, "50")  # Default value
chunk_length_entry.grid(row=0, column=1, padx=5, pady=5)
CreateToolTip(chunk_length_entry, "Shorter chunks (20-50 ms) capture more detail.\nLonger chunks (200-1000 ms) create smoother actions.")

# Silence threshold input
silence_threshold_label = tk.Label(params_frame, text="Silence Threshold (dBFS):")
silence_threshold_label.grid(row=1, column=0, sticky="e", padx=5, pady=5)
silence_threshold_entry = tk.Entry(params_frame)
silence_threshold_entry.insert(0, "-50")  # Default value
silence_threshold_entry.grid(row=1, column=1, padx=5, pady=5)
CreateToolTip(silence_threshold_entry, "Audio below this volume is considered silent.\nRaise this value (-40 to -30 dBFS) for quieter tracks.")

# Max volume threshold input
max_volume_threshold_label = tk.Label(params_frame, text="Max Volume Threshold (dBFS):")
max_volume_threshold_label.grid(row=2, column=0, sticky="e", padx=5, pady=5)
max_volume_threshold_entry = tk.Entry(params_frame)
max_volume_threshold_entry.insert(0, "0")  # Default value
max_volume_threshold_entry.grid(row=2, column=1, padx=5, pady=5)
CreateToolTip(max_volume_threshold_entry, "Audio above this level is considered maximum.\nLower this value (-5 to -3 dBFS) to capture more peaks.")

# Button to select audio file
select_button = tk.Button(root, text="Select Audio File", command=select_file)
select_button.pack(pady=20)

# Run the GUI
root.mainloop()

2 Likes
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import json
import os
from pydub import AudioSegment
from pydub.utils import make_chunks
import statistics

class ToolTip:
    def __init__(self, widget, text):
        self.widget = widget
        self.text = text
        self.tip_window = None
        self.widget.bind("<Enter>", self.show_tip)
        self.widget.bind("<Leave>", self.hide_tip)

    def show_tip(self, event=None):
        if self.tip_window or not self.text:
            return
        x = self.widget.winfo_rootx() + 20 
        y = self.widget.winfo_rooty() + self.widget.winfo_height() + 5
        self.tip_window = tw = tk.Toplevel(self.widget)
        tw.wm_overrideredirect(True)
        tw.wm_geometry(f"+{x}+{y}")
        label = tk.Label(tw, text=self.text, justify='left',
                         background="#ffffe0", relief='solid', borderwidth=1,
                         font=("Arial", 10))
        label.pack()

    def hide_tip(self, event=None):
        if self.tip_window:
            self.tip_window.destroy()
            self.tip_window = None

# Analyze audio and recommend parameters
def analyze_audio(audio_file):
    try:
        audio = AudioSegment.from_file(audio_file)
        audio = audio.set_channels(1)
        
        duration_ms = len(audio)
        loudness_values = [chunk.dBFS for chunk in make_chunks(audio, 100)]
        
        valid_loudness = [x for x in loudness_values if x != float('-inf')]
        if not valid_loudness:
            raise ValueError("Audio file is silent, cannot analyze")
        
        avg_loudness = statistics.mean(valid_loudness)
        min_loudness = min(valid_loudness)
        max_loudness = max(valid_loudness)

        silence_threshold = min(min_loudness + 10, avg_loudness - 5)
        silence_threshold = max(silence_threshold, -60)
        
        max_volume_threshold = max_loudness - 3
        max_volume_threshold = min(max_volume_threshold, 0)
        
        loudness_variance = statistics.variance(valid_loudness) if len(valid_loudness) > 1 else 0
        if duration_ms < 60000:
            chunk_length = 20 if loudness_variance > 100 else 50
        elif duration_ms < 300000:
            chunk_length = 50 if loudness_variance > 100 else 100
        else:
            chunk_length = 100 if loudness_variance > 100 else 200

        return {
            "audio_file": audio_file,
            "chunk_length": chunk_length,
            "silence_threshold": round(silence_threshold, 1),
            "max_volume_threshold": round(max_volume_threshold, 1)
        }
    except Exception as e:
        messagebox.showerror("Error", f"Audio analysis failed: {str(e)}")
        return None

# Process audio and generate Funscript
def audio_to_funscript(audio_file, output_file, chunk_length_ms, silence_threshold, max_volume_threshold, progress_callback=None):
    try:
        audio = AudioSegment.from_file(audio_file)
        audio = audio.set_channels(1)
        chunks = make_chunks(audio, chunk_length_ms)
        
        actions = []
        total_chunks = len(chunks)
        
        for i, chunk in enumerate(chunks):
            loudness = chunk.dBFS
            if loudness == float('-inf'):
                normalized = 0
            else:
                normalized = (loudness - silence_threshold) / (max_volume_threshold - silence_threshold) * 100
                normalized = max(min(normalized, 100), 0)
                normalized = int(normalized)

            actions.append({"pos": normalized, "at": i * chunk_length_ms})
            
            if progress_callback:
                progress_callback(i / total_chunks * 100)

        funscript_data = {
            "version": "1.0",
            "inverted": False,
            "range": 90,
            "info": "Generated from audio volume",
            "actions": actions
        }

        with open(output_file, 'w') as f:
            json.dump(funscript_data, f, indent=4)
        return True
    except Exception as e:
        messagebox.showerror("Error", f"Audio processing failed: {str(e)}")
        return False

# Analyze audio and update parameters
def analyze_and_update():
    global current_audio_params
    audio_file = filedialog.askopenfilename(
        filetypes=[("Audio Files", "*.mp3 *.wav")],
        title="Select Audio File to Analyze"
    )
    if audio_file:
        params = analyze_audio(audio_file)
        if params:
            current_audio_params = params
            chunk_entry.delete(0, tk.END)
            chunk_entry.insert(0, str(params["chunk_length"]))
            silence_entry.delete(0, tk.END)
            silence_entry.insert(0, str(params["silence_threshold"]))
            max_entry.delete(0, tk.END)
            max_entry.insert(0, str(params["max_volume_threshold"]))
            convert_button.config(state="normal")
            messagebox.showinfo("Analysis Complete", f"Analyzed audio: {os.path.basename(audio_file)}\nRecommended parameters updated, confirm or adjust then click 'Confirm'")

# Confirm and convert
def confirm_and_convert():
    global current_audio_params
    if not current_audio_params or "audio_file" not in current_audio_params:
        messagebox.showerror("Error", "Please analyze an audio file first")
        return

    try:
        chunk_length = int(chunk_entry.get())
        silence_thresh = float(silence_entry.get())
        max_thresh = float(max_entry.get())

        if not 20 <= chunk_length <= 1000:
            messagebox.showerror("Error", "Chunk length must be between 20 and 1000 ms")
            return
        if silence_thresh >= max_thresh:
            messagebox.showerror("Error", "Silence threshold must be less than max volume threshold")
            return
        if max_thresh > 0 or silence_thresh > 0:
            messagebox.showerror("Error", "dBFS values must be negative (recommended -60 to 0)")
            return

        audio_file = current_audio_params["audio_file"]
        output_file = os.path.splitext(audio_file)[0] + '.funscript'
        
        progress_window = tk.Toplevel(root)
        progress_window.title("Processing")
        progress_window.geometry("300x100")
        progress_window.transient(root)
        progress_window.grab_set()
        
        tk.Label(progress_window, text="Generating Funscript...").pack(pady=10)
        progress_bar = ttk.Progressbar(progress_window, length=200, mode='determinate')
        progress_bar.pack(pady=10)

        def update_progress(value):
            progress_bar['value'] = value
            progress_window.update_idletasks()

        if audio_to_funscript(audio_file, output_file, chunk_length, silence_thresh, max_thresh, update_progress):
            progress_window.destroy()
            messagebox.showinfo("Success", f"Funscript generated at:\n{output_file}")
            convert_button.config(state="disabled")
            current_audio_params = None
        else:
            progress_window.destroy()

    except ValueError:
        messagebox.showerror("Error", "Please enter valid numbers")

# Create GUI
root = tk.Tk()
root.title("Audio to Funscript Tool")
root.geometry("400x500")
root.resizable(False, False)

current_audio_params = None  # Global variable to store current audio and parameters

# Title and description
title_label = tk.Label(root, text="Audio to Funscript", font=("Arial", 16, "bold"))
title_label.pack(pady=10)

info_label = tk.Label(root, text="Select an audio file to analyze parameters, then confirm to generate Funscript", 
                      font=("Arial", 10), wraplength=350)
info_label.pack(pady=5)

# Parameter input frame
param_frame = ttk.LabelFrame(root, text="Parameter Settings", padding=10)
param_frame.pack(pady=10, padx=10, fill="x")

tk.Label(param_frame, text="Chunk Length (ms):").grid(row=0, column=0, sticky="e", pady=5)
chunk_entry = ttk.Entry(param_frame, width=10)
chunk_entry.insert(0, "50")
chunk_entry.grid(row=0, column=1, pady=5, padx=5)
ToolTip(chunk_entry, "Recommended: 20-50 ms (rich detail)\n200-1000 ms (smooth actions)")

tk.Label(param_frame, text="Silence Threshold (dBFS):").grid(row=1, column=0, sticky="e", pady=5)
silence_entry = ttk.Entry(param_frame, width=10)
silence_entry.insert(0, "-50")
silence_entry.grid(row=1, column=1, pady=5, padx=5)
ToolTip(silence_entry, "Recommended: -60 to -30\nVolume below this is considered silent")

tk.Label(param_frame, text="Max Volume Threshold (dBFS):").grid(row=2, column=0, sticky="e", pady=5)
max_entry = ttk.Entry(param_frame, width=10)
max_entry.insert(0, "-3")
max_entry.grid(row=2, column=1, pady=5, padx=5)
ToolTip(max_entry, "Recommended: -5 to 0\nVolume above this is considered maximum")

# Button frame
button_frame = ttk.Frame(root)
button_frame.pack(pady=20)

analyze_button = ttk.Button(button_frame, text="Analyze Audio and Recommend Parameters", command=analyze_and_update)
analyze_button.pack(pady=5)

convert_button = ttk.Button(button_frame, text="Confirm", command=confirm_and_convert, state="disabled")
convert_button.pack(pady=5)

# Footer note
footer_label = tk.Label(root, text="Tip: Analyze audio first, adjust parameters if needed, then click 'Confirm' to generate", 
                        font=("Arial", 8), fg="gray")
footer_label.pack(side="bottom", pady=10)

root.mainloop()


https://pixeldrain.com/u/NyxvWdDg
It has been packaged into an EXE file. You can download and use it if needed。

1 Like