refactor: Migrate MIDI correction tools to use PrettyMIDI library
Browse filesReworks the entire MIDI correction suite to operate on `PrettyMIDI` objects instead of the internal `escore` format.
app.py
CHANGED
|
@@ -79,7 +79,7 @@ from basic_pitch import ICASSP_2022_MODEL_PATH
|
|
| 79 |
# --- Imports for 8-bit Synthesizer & MIDI Merging ---
|
| 80 |
import pretty_midi
|
| 81 |
import numpy as np
|
| 82 |
-
from scipy import signal
|
| 83 |
|
| 84 |
# =================================================================================================
|
| 85 |
# === Hugging Face SoundFont Downloader ===
|
|
@@ -153,18 +153,19 @@ class AppParameters:
|
|
| 153 |
render_remove_drums: bool = False
|
| 154 |
|
| 155 |
# EXPERIMENTAL: MIDI Post-Processing & Correction Tools
|
| 156 |
-
enable_midi_corrections: bool = False
|
| 157 |
-
correction_filter_spurious_notes: bool = True
|
| 158 |
-
correction_spurious_duration_ms: int = 50
|
| 159 |
-
correction_spurious_velocity: int = 20
|
| 160 |
-
correction_remove_abnormal_rhythm: bool = False
|
| 161 |
-
correction_rhythm_stab_by_segment: bool = False
|
| 162 |
-
correction_rhythm_stab_segment_silence_s: float = 1.0
|
| 163 |
-
correction_quantize_level: str = "None"
|
| 164 |
-
correction_velocity_mode: str = "None"
|
| 165 |
-
correction_velocity_smooth_factor: float = 0.5
|
| 166 |
-
correction_velocity_compress_min: int = 30
|
| 167 |
-
correction_velocity_compress_max: int = 100
|
|
|
|
| 168 |
|
| 169 |
# 8-bit Synthesizer Settings
|
| 170 |
s8bit_waveform_type: str = 'Square'
|
|
@@ -230,277 +231,494 @@ class AppParameters:
|
|
| 230 |
s8bit_delay_lowpass_cutoff_hz: int = 5000 # Lowpass filter frequency for delay echoes (removes harsh high frequencies from echoes)
|
| 231 |
s8bit_delay_treble_pitch_shift: int = 0 # Pitch shift (in semitones) applied to high notes in delay echoes
|
| 232 |
|
| 233 |
-
# =================================================================================================
|
| 234 |
-
# === Helper Functions ===
|
| 235 |
-
# =================================================================================================
|
| 236 |
|
|
|
|
|
|
|
|
|
|
| 237 |
|
| 238 |
-
def
|
| 239 |
-
"""
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
-
Args:
|
| 243 |
-
escore (list): The list of events.
|
| 244 |
-
bpm (float): The Beats Per Minute of the track.
|
| 245 |
-
quantize_level_str (str): The quantization level, e.g., "1/8", "1/16", "1/32".
|
| 246 |
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
"1/12": 3.0, # 3 notes per beat
|
| 256 |
-
"1/16": 4.0,
|
| 257 |
-
"1/24": 6.0, # 6 notes per beat
|
| 258 |
-
"1/32": 8.0,
|
| 259 |
-
"1/64": 16.0
|
| 260 |
-
}
|
| 261 |
-
division = level_map.get(quantize_level_str)
|
| 262 |
-
if not division:
|
| 263 |
-
print(" - Invalid quantization level. Skipping.")
|
| 264 |
-
return escore
|
| 265 |
-
|
| 266 |
-
# Calculate the duration of a single grid step in milliseconds
|
| 267 |
-
grid_ms = (60000.0 / bpm) / division
|
| 268 |
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
original_start_time = event[0]
|
| 275 |
-
# The core quantization logic: find the nearest grid point
|
| 276 |
-
quantized_start_time = round(original_start_time / grid_ms) * grid_ms
|
| 277 |
-
event[0] = int(quantized_start_time)
|
| 278 |
-
notes_quantized += 1
|
| 279 |
-
quantized_escore.append(event)
|
| 280 |
-
|
| 281 |
-
print(f" - Quantized {notes_quantized} notes.")
|
| 282 |
-
return quantized_escore
|
| 283 |
|
| 284 |
-
|
| 285 |
-
def filter_spurious_notes_escore(escore, max_dur_ms=50, max_vel=20):
|
| 286 |
"""
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
Args:
|
| 290 |
-
escore (list): The list of events.
|
| 291 |
-
max_dur_ms (int): Notes with duration shorter than this will be considered.
|
| 292 |
-
max_vel (int): Notes with velocity lower than this will be considered.
|
| 293 |
-
|
| 294 |
-
Returns:
|
| 295 |
-
list: The cleaned escore.
|
| 296 |
"""
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
cleaned_notes = [
|
| 304 |
-
note for note in note_events
|
| 305 |
-
if not (note[1] < max_dur_ms and note[3] < max_vel)
|
| 306 |
-
]
|
| 307 |
|
| 308 |
-
|
| 309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
|
| 311 |
-
#
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
-
|
| 321 |
-
escore (list): The list of events.
|
| 322 |
-
mode (str): "Smooth", "Compress", or "None".
|
| 323 |
-
smooth_factor (float): How much to blend with neighbors (0=none, 1=full average).
|
| 324 |
-
compress_min (int): The target minimum velocity for compression.
|
| 325 |
-
compress_max (int): The target maximum velocity for compression.
|
| 326 |
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
return escore
|
| 332 |
-
|
| 333 |
-
print(f" - Processing velocities with mode: {mode}...")
|
| 334 |
-
|
| 335 |
-
note_events = [note for note in escore if isinstance(note[0], (int, float))]
|
| 336 |
-
metadata_events = [meta for meta in escore if not isinstance(meta[0], (int, float))]
|
| 337 |
|
| 338 |
-
|
| 339 |
-
return escore
|
| 340 |
-
|
| 341 |
-
velocities = [note[3] for note in note_events]
|
| 342 |
-
|
| 343 |
-
if mode == "Smooth":
|
| 344 |
-
new_velocities = list(velocities) # Start with a copy
|
| 345 |
-
# Iterate from the second to the second-to-last note
|
| 346 |
-
for i in range(1, len(velocities) - 1):
|
| 347 |
-
prev_vel = velocities[i-1]
|
| 348 |
-
current_vel = velocities[i]
|
| 349 |
-
next_vel = velocities[i+1]
|
| 350 |
-
neighbor_avg = (prev_vel + next_vel) / 2.0
|
| 351 |
-
# Blend the current velocity with the average of its neighbors
|
| 352 |
-
smoothed_vel = (current_vel * (1 - smooth_factor)) + (neighbor_avg * smooth_factor)
|
| 353 |
-
new_velocities[i] = int(max(1, min(127, smoothed_vel)))
|
| 354 |
-
|
| 355 |
-
for i, note in enumerate(note_events):
|
| 356 |
-
note[3] = new_velocities[i]
|
| 357 |
-
print(f" - Smoothed {len(note_events)} velocities.")
|
| 358 |
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
if max_vel_orig == min_vel_orig:
|
| 365 |
-
return escore
|
| 366 |
-
|
| 367 |
-
for note in note_events:
|
| 368 |
-
# Linear mapping from original range to target range
|
| 369 |
-
original_vel = note[3]
|
| 370 |
-
new_vel = compress_min + (original_vel - min_vel_orig) * \
|
| 371 |
-
(compress_max - compress_min) / (max_vel_orig - min_vel_orig)
|
| 372 |
-
note[3] = int(max(1, min(127, new_vel)))
|
| 373 |
-
print(f" - Compressed {len(note_events)} velocities to range [{compress_min}, {compress_max}].")
|
| 374 |
-
|
| 375 |
-
final_escore = metadata_events + note_events
|
| 376 |
-
final_escore.sort(key=lambda event: event[1] if isinstance(event[0], str) else event[0])
|
| 377 |
-
return final_escore
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
def stabilize_midi_rhythm(escore,
|
| 381 |
-
ioi_threshold_ratio=0.30,
|
| 382 |
-
min_ioi_ms=30,
|
| 383 |
-
enable_segmentation=True,
|
| 384 |
-
silence_split_threshold_s=2.0):
|
| 385 |
-
"""
|
| 386 |
-
Removes or merges rhythmically unstable notes from an escore list.
|
| 387 |
-
This is designed to clean up MIDI generated by basic-pitch with multiple pitch bends,
|
| 388 |
-
which can create clusters of very short, dense notes to approximate a slide.
|
| 389 |
-
This version can segment the MIDI based on silence before processing, making it robust
|
| 390 |
-
for files containing multiple songs with different tempos (like an album).
|
| 391 |
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
metadata_events = [meta for meta in escore if not isinstance(meta[0], (int, float))]
|
| 405 |
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
|
|
|
|
|
|
| 410 |
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
print(f" - Segmentation enabled (silence > {silence_split_threshold_s}s).")
|
| 420 |
-
current_segment = [note_events[0]]
|
| 421 |
-
silence_threshold_ms = silence_split_threshold_s * 1000
|
| 422 |
-
|
| 423 |
-
for i in range(1, len(note_events)):
|
| 424 |
-
prev_note_end_ms = note_events[i-1][0] + note_events[i-1][1]
|
| 425 |
-
current_note_start_ms = note_events[i][0]
|
| 426 |
-
gap_ms = current_note_start_ms - prev_note_end_ms
|
| 427 |
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
|
|
|
|
|
|
| 431 |
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
else:
|
| 437 |
-
|
| 438 |
-
segments
|
|
|
|
| 439 |
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
total_merged_count = 0
|
| 443 |
|
| 444 |
for i, segment in enumerate(segments):
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
continue
|
| 458 |
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
notes_merged_in_segment += 1
|
| 479 |
-
# Merge by extending the previous note's duration to cover the current note
|
| 480 |
-
new_end_time = current_note[0] + current_note[1]
|
| 481 |
-
last_kept_note[1] = new_end_time - last_kept_note[0]
|
| 482 |
-
else:
|
| 483 |
-
# Note is rhythmically stable, so we keep it
|
| 484 |
-
cleaned_segment.append(copy.deepcopy(current_note))
|
| 485 |
-
|
| 486 |
-
if len(segments) > 1:
|
| 487 |
-
print(f" - Segment {i+1}: Median IOI {median_ioi:.2f}ms, merged {notes_merged_in_segment} notes.")
|
| 488 |
|
| 489 |
-
|
| 490 |
-
total_merged_count += notes_merged_in_segment
|
| 491 |
|
| 492 |
-
if total_merged_count > 0:
|
| 493 |
-
print(f" - Rhythm stabilization complete. Total merged notes: {total_merged_count}.")
|
| 494 |
|
| 495 |
-
# 4. Recombine metadata with the globally cleaned notes and re-sort
|
| 496 |
-
final_escore = metadata_events + all_cleaned_notes
|
| 497 |
-
|
| 498 |
-
# Re-sort the entire list by time to ensure correct MIDI event order.
|
| 499 |
-
# The sort key must handle both event types: metadata time is at index 1, note time is at index 0.
|
| 500 |
-
final_escore.sort(key=lambda event: event[1] if isinstance(event[0], str) else event[0])
|
| 501 |
-
|
| 502 |
-
return final_escore
|
| 503 |
|
|
|
|
|
|
|
|
|
|
| 504 |
|
| 505 |
def analyze_audio_for_adaptive_params(audio_data: np.ndarray, sample_rate: int):
|
| 506 |
"""
|
|
@@ -1991,9 +2209,85 @@ def Render_MIDI(*, input_midi_path: str, params: AppParameters, progress: gr.Pro
|
|
| 1991 |
print(f"Render type: {params.render_type}")
|
| 1992 |
print(f"Soundfont bank: {params.soundfont_bank}")
|
| 1993 |
print(f"Audio render sample rate: {params.render_sample_rate}")
|
| 1994 |
-
# ... (add other print statements for settings if needed)
|
| 1995 |
print('=' * 70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1996 |
|
|
|
|
|
|
|
| 1997 |
# --- MIDI Processing using TMIDIX ---
|
| 1998 |
print('Processing MIDI... Please wait...')
|
| 1999 |
raw_score = MIDI.midi2single_track_ms_score(fdata)
|
|
@@ -2140,53 +2434,6 @@ def Render_MIDI(*, input_midi_path: str, params: AppParameters, progress: gr.Pro
|
|
| 2140 |
o[1] *= 200
|
| 2141 |
o[2] *= 200
|
| 2142 |
|
| 2143 |
-
# --- MIDI Post-Processing & Correction Block ---
|
| 2144 |
-
if getattr(params, 'enable_midi_corrections', False):
|
| 2145 |
-
print("Applying MIDI Post-Processing & Corrections...")
|
| 2146 |
-
|
| 2147 |
-
# Filter spurious notes first to clean the data for other processes
|
| 2148 |
-
if getattr(params, 'correction_filter_spurious_notes', False):
|
| 2149 |
-
output_score = filter_spurious_notes_escore(
|
| 2150 |
-
output_score,
|
| 2151 |
-
max_dur_ms=getattr(params, 'correction_spurious_duration_ms', 50),
|
| 2152 |
-
max_vel=getattr(params, 'correction_spurious_velocity', 20)
|
| 2153 |
-
)
|
| 2154 |
-
|
| 2155 |
-
# Then, stabilize rhythm on the cleaned notes
|
| 2156 |
-
if getattr(params, 'correction_remove_abnormal_rhythm', False):
|
| 2157 |
-
output_score = stabilize_midi_rhythm(
|
| 2158 |
-
output_score,
|
| 2159 |
-
enable_segmentation=getattr(params, 'correction_rhythm_stab_by_segment', False),
|
| 2160 |
-
silence_split_threshold_s=getattr(params, 'correction_rhythm_stab_segment_silence_s', 1.0)
|
| 2161 |
-
)
|
| 2162 |
-
|
| 2163 |
-
# Then, quantize the stabilized rhythm
|
| 2164 |
-
quantize_level = getattr(params, 'correction_quantize_level', "None")
|
| 2165 |
-
if quantize_level != "None":
|
| 2166 |
-
try:
|
| 2167 |
-
# We need to get the BPM for quantization. We do this once here.
|
| 2168 |
-
midi_obj_for_bpm = pretty_midi.PrettyMIDI(input_midi_path)
|
| 2169 |
-
estimated_bpm = midi_obj_for_bpm.estimate_tempo()
|
| 2170 |
-
output_score = quantize_escore(output_score, estimated_bpm, quantize_level)
|
| 2171 |
-
except Exception as e:
|
| 2172 |
-
print(f" - Could not estimate BPM for quantization. Skipping. Error: {e}")
|
| 2173 |
-
|
| 2174 |
-
# Finally, process velocity as it doesn't affect timing or notes
|
| 2175 |
-
velocity_mode = getattr(params, 'correction_velocity_mode', "None")
|
| 2176 |
-
if velocity_mode != "None":
|
| 2177 |
-
output_score = process_velocity_escore(
|
| 2178 |
-
output_score,
|
| 2179 |
-
mode=velocity_mode,
|
| 2180 |
-
smooth_factor=getattr(params, 'correction_velocity_smooth_factor', 0.5),
|
| 2181 |
-
compress_min=getattr(params, 'correction_velocity_compress_min', 30),
|
| 2182 |
-
compress_max=getattr(params, 'correction_velocity_compress_max', 100)
|
| 2183 |
-
)
|
| 2184 |
-
print("Corrections finished.")
|
| 2185 |
-
print('=' * 70)
|
| 2186 |
-
|
| 2187 |
-
print('Final adjustments complete.')
|
| 2188 |
-
print('=' * 70)
|
| 2189 |
-
|
| 2190 |
# --- Saving Processed MIDI File ---
|
| 2191 |
# Save the transformed MIDI data
|
| 2192 |
SONG, patches, _ = TMIDIX.patch_enhanced_score_notes(output_score)
|
|
@@ -4091,18 +4338,26 @@ if __name__ == "__main__":
|
|
| 4091 |
correction_remove_abnormal_rhythm = gr.Checkbox(label="Stabilize Rhythm (for Pitch Bend)", value=False,
|
| 4092 |
info="Attempts to merge overly dense, rhythmically unstable notes often created when 'Allow Multiple Pitch Bends' is used. This can clean up the rhythm but may lose some pitch slide nuance.")
|
| 4093 |
with gr.Group(visible=False) as rhythm_stab_options: # This group is initially hidden
|
| 4094 |
-
correction_rhythm_stab_by_segment = gr.Checkbox(label="Enable Segmentation by Silence", value=
|
| 4095 |
info="Highly recommended for albums or long files. Splits the MIDI by silent parts before stabilizing rhythm, ensuring accuracy for songs with different tempos.")
|
| 4096 |
correction_rhythm_stab_segment_silence_s = gr.Slider(minimum=0.5, maximum=10.0, value=1.0, step=0.5,
|
| 4097 |
label="Silence Threshold for Segmentation (seconds)",
|
| 4098 |
info="The amount of silence required to start a new segment. 1-3 seconds is usually enough to separate songs on an album.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4099 |
# --- Quantization Group ---
|
| 4100 |
with gr.Group():
|
| 4101 |
correction_quantize_level = gr.Dropdown(
|
| 4102 |
-
["None", "1/64", "1/32", "1/16", "1/8", "1/4", "1/24", "1/12"],
|
| 4103 |
value="None",
|
| 4104 |
label="Quantize Rhythm",
|
| 4105 |
-
info="Quantizes notes to the nearest rhythmic grid line. '1/16' is recommended for most pop and rock music. For expressive genres like classical or jazz, use with caution as it may reduce natural timing nuances. Straight divisions (1/8, 1/16, etc.) suit most modern music, while swing divisions (1/12, 1/24) are ideal for jazz, blues, or shuffle styles."
|
| 4106 |
)
|
| 4107 |
# --- Velocity Processing Group ---
|
| 4108 |
with gr.Group():
|
|
|
|
| 79 |
# --- Imports for 8-bit Synthesizer & MIDI Merging ---
|
| 80 |
import pretty_midi
|
| 81 |
import numpy as np
|
| 82 |
+
from scipy import signal, stats
|
| 83 |
|
| 84 |
# =================================================================================================
|
| 85 |
# === Hugging Face SoundFont Downloader ===
|
|
|
|
| 153 |
render_remove_drums: bool = False
|
| 154 |
|
| 155 |
# EXPERIMENTAL: MIDI Post-Processing & Correction Tools
|
| 156 |
+
enable_midi_corrections: bool = False # Master switch for enabling MIDI correction tools
|
| 157 |
+
correction_filter_spurious_notes: bool = True # Enable filtering of spurious (noise) notes
|
| 158 |
+
correction_spurious_duration_ms: int = 50 # Maximum duration (ms) for a note to be considered spurious
|
| 159 |
+
correction_spurious_velocity: int = 20 # Maximum velocity for a note to be considered spurious
|
| 160 |
+
correction_remove_abnormal_rhythm: bool = False # Enable rhythm stabilization for abnormal rhythm
|
| 161 |
+
correction_rhythm_stab_by_segment: bool = False # Enable segmentation by silence before rhythm stabilization
|
| 162 |
+
correction_rhythm_stab_segment_silence_s: float = 1.0 # Silence threshold (seconds) for segmenting MIDI
|
| 163 |
+
correction_quantize_level: str = "None" # Quantization level for note timing (e.g., "1/16", "None")
|
| 164 |
+
correction_velocity_mode: str = "None" # Velocity processing mode ("None", "Smooth", "Compress")
|
| 165 |
+
correction_velocity_smooth_factor: float = 0.5 # Smoothing factor for velocity processing
|
| 166 |
+
correction_velocity_compress_min: int = 30 # Minimum velocity after compression
|
| 167 |
+
correction_velocity_compress_max: int = 100 # Maximum velocity after compression
|
| 168 |
+
correction_rhythmic_simplification_level: str = "None" # rhythmic simplification
|
| 169 |
|
| 170 |
# 8-bit Synthesizer Settings
|
| 171 |
s8bit_waveform_type: str = 'Square'
|
|
|
|
| 231 |
s8bit_delay_lowpass_cutoff_hz: int = 5000 # Lowpass filter frequency for delay echoes (removes harsh high frequencies from echoes)
|
| 232 |
s8bit_delay_treble_pitch_shift: int = 0 # Pitch shift (in semitones) applied to high notes in delay echoes
|
| 233 |
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
+
# ===============================================================================
|
| 236 |
+
# === MIDI CORRECTION SUITE (Operating on pretty_midi objects for robustness) ===
|
| 237 |
+
# ===============================================================================
|
| 238 |
|
| 239 |
+
def _get_all_notes(midi_obj: pretty_midi.PrettyMIDI, include_drums=False):
|
| 240 |
+
"""Helper to get a single sorted list of all notes from all instruments."""
|
| 241 |
+
all_notes = []
|
| 242 |
+
for instrument in midi_obj.instruments:
|
| 243 |
+
if not instrument.is_drum or include_drums:
|
| 244 |
+
all_notes.extend(instrument.notes)
|
| 245 |
+
all_notes.sort(key=lambda x: x.start)
|
| 246 |
+
return all_notes
|
| 247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
+
def _normalize_instrument_times(instrument: pretty_midi.Instrument):
|
| 250 |
+
"""Creates a temporary, normalized version of an instrument where timestamps start from 0."""
|
| 251 |
+
if not instrument.notes:
|
| 252 |
+
return instrument
|
| 253 |
|
| 254 |
+
# Sort notes by start time to reliably get the first note
|
| 255 |
+
notes = sorted(instrument.notes, key=lambda x: x.start)
|
| 256 |
+
start_offset = notes[0].start
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
+
normalized_instrument = copy.deepcopy(instrument)
|
| 259 |
+
for note in normalized_instrument.notes:
|
| 260 |
+
note.start -= start_offset
|
| 261 |
+
note.end -= start_offset
|
| 262 |
+
return normalized_instrument
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
|
| 264 |
+
def _segment_midi_by_silence(midi_obj: pretty_midi.PrettyMIDI, silence_threshold_s=1.0):
|
|
|
|
| 265 |
"""
|
| 266 |
+
Splits a PrettyMIDI object into a list of PrettyMIDI objects, each representing a segment.
|
| 267 |
+
This is the core of per-song processing for albums.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
"""
|
| 269 |
+
all_notes = _get_all_notes(midi_obj, include_drums=True)
|
| 270 |
+
if not all_notes:
|
| 271 |
+
return []
|
| 272 |
+
|
| 273 |
+
segments = []
|
| 274 |
+
current_segment_notes = {i: [] for i in range(len(midi_obj.instruments))}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
+
# Add the very first note to the first segment
|
| 277 |
+
for i, inst in enumerate(midi_obj.instruments):
|
| 278 |
+
for note in inst.notes:
|
| 279 |
+
if note == all_notes[0]:
|
| 280 |
+
current_segment_notes[i].append(note)
|
| 281 |
+
break
|
| 282 |
+
|
| 283 |
+
for i in range(1, len(all_notes)):
|
| 284 |
+
prev_note_end = all_notes[i-1].end
|
| 285 |
+
current_note_start = all_notes[i].start
|
| 286 |
+
gap = current_note_start - prev_note_end
|
| 287 |
+
|
| 288 |
+
if gap > silence_threshold_s:
|
| 289 |
+
# End of a segment, create a new MIDI object for it
|
| 290 |
+
segment_midi = pretty_midi.PrettyMIDI()
|
| 291 |
+
for inst_idx, inst_notes in current_segment_notes.items():
|
| 292 |
+
if inst_notes:
|
| 293 |
+
new_inst = pretty_midi.Instrument(program=midi_obj.instruments[inst_idx].program, is_drum=midi_obj.instruments[inst_idx].is_drum)
|
| 294 |
+
new_inst.notes.extend(inst_notes)
|
| 295 |
+
segment_midi.instruments.append(new_inst)
|
| 296 |
+
if segment_midi.instruments:
|
| 297 |
+
segments.append(segment_midi)
|
| 298 |
+
# Start a new segment
|
| 299 |
+
current_segment_notes = {i: [] for i in range(len(midi_obj.instruments))}
|
| 300 |
+
|
| 301 |
+
# Find which instrument this note belongs to and add it
|
| 302 |
+
for inst_idx, inst in enumerate(midi_obj.instruments):
|
| 303 |
+
if all_notes[i] in inst.notes:
|
| 304 |
+
current_segment_notes[inst_idx].append(all_notes[i])
|
| 305 |
+
break
|
| 306 |
|
| 307 |
+
# Add the final segment
|
| 308 |
+
final_segment_midi = pretty_midi.PrettyMIDI()
|
| 309 |
+
for inst_idx, inst_notes in current_segment_notes.items():
|
| 310 |
+
if inst_notes:
|
| 311 |
+
new_inst = pretty_midi.Instrument(program=midi_obj.instruments[inst_idx].program, is_drum=midi_obj.instruments[inst_idx].is_drum)
|
| 312 |
+
new_inst.notes.extend(inst_notes)
|
| 313 |
+
final_segment_midi.instruments.append(new_inst)
|
| 314 |
+
if final_segment_midi.instruments:
|
| 315 |
+
segments.append(final_segment_midi)
|
| 316 |
+
|
| 317 |
+
return segments
|
| 318 |
+
|
| 319 |
+
def _recombine_segments(segments):
|
| 320 |
+
"""Merges a list of segmented PrettyMIDI objects back into one."""
|
| 321 |
+
recombined_midi = pretty_midi.PrettyMIDI()
|
| 322 |
+
# Create instrument tracks in the final MIDI object
|
| 323 |
+
if segments:
|
| 324 |
+
template_midi = segments[0]
|
| 325 |
+
for i, inst in enumerate(template_midi.instruments):
|
| 326 |
+
recombined_midi.instruments.append(pretty_midi.Instrument(program=inst.program, is_drum=inst.is_drum))
|
| 327 |
+
|
| 328 |
+
# Populate the tracks with notes from all segments
|
| 329 |
+
for segment in segments:
|
| 330 |
+
for i, inst in enumerate(segment.instruments):
|
| 331 |
+
# This assumes instrument order is consistent, which our segmentation function ensures
|
| 332 |
+
recombined_midi.instruments[i].notes.extend(inst.notes)
|
| 333 |
+
|
| 334 |
+
return recombined_midi
|
| 335 |
+
|
| 336 |
+
def _analyze_best_quantize_level(notes, bpm, error_threshold_ratio=0.25):
|
| 337 |
+
"""Analyzes a list of notes to determine the most likely quantization grid."""
|
| 338 |
+
if not notes: return "None"
|
| 339 |
+
grids_to_test = ["1/8", "1/12", "1/16", "1/24", "1/32"]
|
| 340 |
+
level_map = {"1/8": 2.0, "1/12": 3.0, "1/16": 4.0, "1/24": 6.0, "1/32": 8.0}
|
| 341 |
+
start_times = [n.start for n in notes]
|
| 342 |
+
results = []
|
| 343 |
+
for grid_name in grids_to_test:
|
| 344 |
+
division = level_map[grid_name]
|
| 345 |
+
grid_s = (60.0 / bpm) / division
|
| 346 |
+
if grid_s < 0.001: continue
|
| 347 |
+
total_error = sum(min(t % grid_s, grid_s - (t % grid_s)) for t in start_times)
|
| 348 |
+
avg_error = total_error / len(start_times)
|
| 349 |
+
results.append({"grid": grid_name, "avg_error": avg_error, "grid_s": grid_s})
|
| 350 |
+
if not results: return "None"
|
| 351 |
+
best_fit = min(results, key=lambda x: x['avg_error'])
|
| 352 |
+
if best_fit['avg_error'] > best_fit['grid_s'] * error_threshold_ratio:
|
| 353 |
+
return "None"
|
| 354 |
+
return best_fit['grid']
|
| 355 |
+
|
| 356 |
+
def filter_spurious_notes_pm(midi_obj: pretty_midi.PrettyMIDI, max_dur_s=0.05, max_vel=20):
|
| 357 |
+
"""Filters out very short and quiet notes from a PrettyMIDI object."""
|
| 358 |
+
print(f" - Filtering spurious notes (duration < {max_dur_s*1000:.0f}ms AND velocity < {max_vel})...")
|
| 359 |
+
notes_removed = 0
|
| 360 |
+
for instrument in midi_obj.instruments:
|
| 361 |
+
original_note_count = len(instrument.notes)
|
| 362 |
+
instrument.notes = [
|
| 363 |
+
note for note in instrument.notes
|
| 364 |
+
if not (note.end - note.start < max_dur_s and note.velocity < max_vel)
|
| 365 |
+
]
|
| 366 |
+
notes_removed += original_note_count - len(instrument.notes)
|
| 367 |
|
| 368 |
+
print(f" - Removed {notes_removed} spurious notes.")
|
| 369 |
+
return midi_obj
|
| 370 |
+
|
| 371 |
+
def stabilize_rhythm_pm(
|
| 372 |
+
midi_obj: pretty_midi.PrettyMIDI,
|
| 373 |
+
ioi_threshold_ratio=0.30,
|
| 374 |
+
min_ioi_s=0.03,
|
| 375 |
+
enable_segmentation=True,
|
| 376 |
+
silence_threshold_s=1.0,
|
| 377 |
+
merge_mode="extend", # "extend" or "drop"
|
| 378 |
+
consider_velocity=True, # consider low velocity notes as decorations
|
| 379 |
+
skip_chords=True, # skip merging if multiple notes start at same time
|
| 380 |
+
use_mode_ioi=False # use mode of IOI instead of median
|
| 381 |
+
):
|
| 382 |
+
"""Enhances rhythm stability by merging rhythmically unstable notes, with advanced options."""
|
| 383 |
+
print(" - Stabilizing rhythm...")
|
| 384 |
+
if not enable_segmentation:
|
| 385 |
+
segments = [midi_obj]
|
| 386 |
+
else:
|
| 387 |
+
segments = _segment_midi_by_silence(midi_obj, silence_threshold_s)
|
| 388 |
+
if len(segments) > 1:
|
| 389 |
+
print(f" - Split into {len(segments)} segments for stabilization.")
|
| 390 |
|
| 391 |
+
processed_segments = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
|
| 393 |
+
for segment in segments:
|
| 394 |
+
for instrument in segment.instruments:
|
| 395 |
+
if instrument.is_drum or len(instrument.notes) < 20:
|
| 396 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
+
notes = sorted(instrument.notes, key=lambda n: n.start)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
|
| 400 |
+
# Compute inter-onset intervals (IOIs)
|
| 401 |
+
iois = [notes[i].start - notes[i-1].start for i in range(1, len(notes))]
|
| 402 |
+
positive_iois = [ioi for ioi in iois if ioi > 0.001]
|
| 403 |
+
if not positive_iois:
|
| 404 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
|
| 406 |
+
# Determine threshold based on median or mode
|
| 407 |
+
if use_mode_ioi:
|
| 408 |
+
try:
|
| 409 |
+
median_ioi = float(stats.mode(positive_iois).mode[0])
|
| 410 |
+
except Exception:
|
| 411 |
+
median_ioi = np.median(positive_iois)
|
| 412 |
+
else:
|
| 413 |
+
median_ioi = np.median(positive_iois)
|
| 414 |
+
threshold_s = max(median_ioi * ioi_threshold_ratio, min_ioi_s)
|
| 415 |
+
|
| 416 |
+
cleaned_notes = [notes[0]]
|
| 417 |
+
for i in range(1, len(notes)):
|
| 418 |
+
prev_note = cleaned_notes[-1]
|
| 419 |
+
curr_note = notes[i]
|
| 420 |
+
|
| 421 |
+
# Skip merging if chord and option enabled
|
| 422 |
+
if skip_chords:
|
| 423 |
+
notes_at_same_time = [n for n in notes if abs(n.start - curr_note.start) < 0.001]
|
| 424 |
+
if len(notes_at_same_time) > 1:
|
| 425 |
+
cleaned_notes.append(curr_note)
|
| 426 |
+
continue
|
| 427 |
+
|
| 428 |
+
# Check if note is considered "unstable/decoration"
|
| 429 |
+
pitch_close = abs(curr_note.pitch - prev_note.pitch) <= 3 # within minor third
|
| 430 |
+
velocity_ok = True
|
| 431 |
+
if consider_velocity:
|
| 432 |
+
velocity_ok = curr_note.velocity < prev_note.velocity * 0.8
|
| 433 |
+
|
| 434 |
+
start_close = (curr_note.start - prev_note.start) < threshold_s
|
| 435 |
+
|
| 436 |
+
if start_close and pitch_close and velocity_ok:
|
| 437 |
+
if merge_mode == "extend":
|
| 438 |
+
# Merge by extending previous note's end
|
| 439 |
+
prev_note.end = max(prev_note.end, curr_note.end)
|
| 440 |
+
elif merge_mode == "drop":
|
| 441 |
+
# Drop the current note
|
| 442 |
+
continue
|
| 443 |
+
else:
|
| 444 |
+
cleaned_notes.append(curr_note)
|
| 445 |
+
|
| 446 |
+
instrument.notes = cleaned_notes
|
| 447 |
+
processed_segments.append(segment)
|
| 448 |
+
|
| 449 |
+
return _recombine_segments(processed_segments) if enable_segmentation else processed_segments[0]
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
def simplify_rhythm_pm(
|
| 453 |
+
midi_obj: pretty_midi.PrettyMIDI,
|
| 454 |
+
simplification_level_str="None",
|
| 455 |
+
enable_segmentation=True,
|
| 456 |
+
silence_threshold_s=1.0,
|
| 457 |
+
keep_chords=True,
|
| 458 |
+
max_notes_per_grid=3
|
| 459 |
+
):
|
| 460 |
+
"""Simplifies rhythm while preserving music length, with optional chord and sustain handling."""
|
| 461 |
+
if simplification_level_str == "None":
|
| 462 |
+
return midi_obj
|
| 463 |
+
print(f" - Simplifying rhythm to {simplification_level_str} grid...")
|
| 464 |
+
|
| 465 |
+
# Split into segments if enabled
|
| 466 |
+
if not enable_segmentation:
|
| 467 |
+
segments = [midi_obj]
|
| 468 |
+
else:
|
| 469 |
+
segments = _segment_midi_by_silence(midi_obj, silence_threshold_s)
|
| 470 |
+
if len(segments) > 1:
|
| 471 |
+
print(f" - Split into {len(segments)} segments for simplification.")
|
| 472 |
|
| 473 |
+
processed_segments = []
|
| 474 |
+
level_map = {"1/4": 1.0, "1/8": 2.0, "1/12": 3.0, "1/16": 4.0, "1/24": 6.0, "1/32": 8.0, "1/64": 16.0}
|
| 475 |
+
division = level_map.get(simplification_level_str)
|
| 476 |
+
if not division:
|
| 477 |
+
return midi_obj
|
|
|
|
| 478 |
|
| 479 |
+
for segment in segments:
|
| 480 |
+
new_segment_midi = pretty_midi.PrettyMIDI()
|
| 481 |
+
for instrument in segment.instruments:
|
| 482 |
+
if instrument.is_drum or not instrument.notes:
|
| 483 |
+
new_segment_midi.instruments.append(instrument)
|
| 484 |
+
continue
|
| 485 |
|
| 486 |
+
try:
|
| 487 |
+
# Prefer using tempo changes from MIDI if available
|
| 488 |
+
if segment.get_tempo_changes()[1].size > 0:
|
| 489 |
+
bpm = float(segment.get_tempo_changes()[1][0])
|
| 490 |
+
else:
|
| 491 |
+
temp_norm_inst = _normalize_instrument_times(instrument)
|
| 492 |
+
temp_midi = pretty_midi.PrettyMIDI(); temp_midi.instruments.append(temp_norm_inst)
|
| 493 |
+
bpm = temp_midi.estimate_tempo()
|
| 494 |
+
bpm = max(40.0, min(bpm, 240.0))
|
| 495 |
+
except Exception:
|
| 496 |
+
new_segment_midi.instruments.append(instrument)
|
| 497 |
+
continue
|
| 498 |
+
|
| 499 |
+
grid_s = (60.0 / bpm) / division
|
| 500 |
+
if grid_s <= 0.001:
|
| 501 |
+
new_segment_midi.instruments.append(instrument)
|
| 502 |
+
continue
|
| 503 |
|
| 504 |
+
simplified_instrument = pretty_midi.Instrument(program=instrument.program, name=instrument.name)
|
| 505 |
+
notes = sorted(instrument.notes, key=lambda x: x.start)
|
| 506 |
+
end_time = segment.get_end_time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
|
| 508 |
+
# Handle sustain pedal CC64 events
|
| 509 |
+
sustain_times = []
|
| 510 |
+
for cc in instrument.control_changes:
|
| 511 |
+
if cc.number == 64: # sustain pedal
|
| 512 |
+
sustain_times.append((cc.time, cc.value >= 64))
|
| 513 |
|
| 514 |
+
# Grid iteration
|
| 515 |
+
current_grid_time = round(notes[0].start / grid_s) * grid_s
|
| 516 |
+
while current_grid_time < end_time:
|
| 517 |
+
notes_in_slot = [n for n in notes if current_grid_time <= n.start < current_grid_time + grid_s]
|
| 518 |
+
if notes_in_slot:
|
| 519 |
+
chosen_notes = []
|
| 520 |
+
if keep_chords:
|
| 521 |
+
# Always keep root (lowest pitch) and top note (highest pitch)
|
| 522 |
+
root_note = min(notes_in_slot, key=lambda n: n.pitch)
|
| 523 |
+
top_note = max(notes_in_slot, key=lambda n: n.pitch)
|
| 524 |
+
chosen_notes.extend([root_note, top_note])
|
| 525 |
+
# Also keep the strongest note (highest velocity)
|
| 526 |
+
strong_note = max(notes_in_slot, key=lambda n: n.velocity)
|
| 527 |
+
if strong_note not in chosen_notes:
|
| 528 |
+
chosen_notes.append(strong_note)
|
| 529 |
+
# Limit chord density
|
| 530 |
+
chosen_notes = sorted(set(chosen_notes), key=lambda n: n.pitch)[:max_notes_per_grid]
|
| 531 |
+
else:
|
| 532 |
+
chosen_notes = [max(notes_in_slot, key=lambda n: n.velocity)]
|
| 533 |
+
|
| 534 |
+
for note in chosen_notes:
|
| 535 |
+
# End is either original note end or grid boundary
|
| 536 |
+
note_end = min(note.end, current_grid_time + grid_s)
|
| 537 |
+
# Extend if sustain pedal is active
|
| 538 |
+
for t, active in sustain_times:
|
| 539 |
+
if t >= note.start and active:
|
| 540 |
+
note_end = max(note_end, current_grid_time + grid_s * 2)
|
| 541 |
+
simplified_instrument.notes.append(pretty_midi.Note(
|
| 542 |
+
velocity=note.velocity,
|
| 543 |
+
pitch=note.pitch,
|
| 544 |
+
start=current_grid_time,
|
| 545 |
+
end=note_end
|
| 546 |
+
))
|
| 547 |
+
current_grid_time += grid_s
|
| 548 |
+
|
| 549 |
+
if simplified_instrument.notes:
|
| 550 |
+
new_segment_midi.instruments.append(simplified_instrument)
|
| 551 |
+
processed_segments.append(new_segment_midi)
|
| 552 |
+
|
| 553 |
+
return _recombine_segments(processed_segments) if enable_segmentation else processed_segments[0]
|
| 554 |
+
|
| 555 |
+
|
| 556 |
+
def quantize_pm(
|
| 557 |
+
midi_obj: pretty_midi.PrettyMIDI,
|
| 558 |
+
quantize_level_str="None",
|
| 559 |
+
enable_segmentation=True,
|
| 560 |
+
silence_threshold_s=1.0,
|
| 561 |
+
quantize_end=True,
|
| 562 |
+
preserve_duration=True
|
| 563 |
+
):
|
| 564 |
+
"""Quantizes notes in a PrettyMIDI object with optional end-time adjustment, sustain handling, and segmentation support."""
|
| 565 |
+
if quantize_level_str == "None":
|
| 566 |
+
return midi_obj
|
| 567 |
+
print(f" - Quantizing notes (Mode: {quantize_level_str})...")
|
| 568 |
+
|
| 569 |
+
# Split into segments if enabled
|
| 570 |
+
if not enable_segmentation:
|
| 571 |
+
segments = [midi_obj]
|
| 572 |
else:
|
| 573 |
+
segments = _segment_midi_by_silence(midi_obj, silence_threshold_s)
|
| 574 |
+
if len(segments) > 1:
|
| 575 |
+
print(f" - Split into {len(segments)} segments for quantization.")
|
| 576 |
|
| 577 |
+
processed_segments = []
|
| 578 |
+
level_map = {"1/4": 1.0, "1/8": 2.0, "1/12": 3.0, "1/16": 4.0, "1/24": 6.0, "1/32": 8.0, "1/64": 16.0}
|
|
|
|
| 579 |
|
| 580 |
for i, segment in enumerate(segments):
|
| 581 |
+
new_segment_midi = pretty_midi.PrettyMIDI()
|
| 582 |
+
for instrument in segment.instruments:
|
| 583 |
+
if instrument.is_drum or not instrument.notes:
|
| 584 |
+
new_segment_midi.instruments.append(instrument)
|
| 585 |
+
continue
|
| 586 |
+
try:
|
| 587 |
+
# Estimate BPM or use first tempo change
|
| 588 |
+
if segment.get_tempo_changes()[1].size > 0:
|
| 589 |
+
bpm = float(segment.get_tempo_changes()[1][0])
|
| 590 |
+
else:
|
| 591 |
+
temp_norm_inst = _normalize_instrument_times(instrument)
|
| 592 |
+
temp_midi = pretty_midi.PrettyMIDI(); temp_midi.instruments.append(temp_norm_inst)
|
| 593 |
+
bpm = temp_midi.estimate_tempo()
|
| 594 |
+
bpm = max(40.0, min(bpm, 240.0))
|
| 595 |
+
except Exception:
|
| 596 |
+
new_segment_midi.instruments.append(instrument)
|
| 597 |
+
continue
|
| 598 |
+
|
| 599 |
+
# Determine quantization grid size
|
| 600 |
+
final_quantize_level = quantize_level_str
|
| 601 |
+
if quantize_level_str == "Auto-Analyze Rhythm":
|
| 602 |
+
final_quantize_level = _analyze_best_quantize_level(instrument.notes, bpm)
|
| 603 |
+
if len(segments) > 1:
|
| 604 |
+
print(f" - Segment {i+1}, Inst '{instrument.name}': Auto-analyzed grid is '{final_quantize_level}'. BPM: {bpm:.2f}")
|
| 605 |
+
|
| 606 |
+
division = level_map.get(final_quantize_level)
|
| 607 |
+
if not division:
|
| 608 |
+
new_segment_midi.instruments.append(instrument)
|
| 609 |
+
continue
|
| 610 |
+
grid_s = (60.0 / bpm) / division
|
| 611 |
+
|
| 612 |
+
# Handle sustain pedal CC64
|
| 613 |
+
sustain_times = []
|
| 614 |
+
for cc in instrument.control_changes:
|
| 615 |
+
if cc.number == 64: # sustain pedal
|
| 616 |
+
sustain_times.append((cc.time, cc.value >= 64))
|
| 617 |
+
|
| 618 |
+
# Quantize notes
|
| 619 |
+
quantized_instrument = pretty_midi.Instrument(program=instrument.program, name=instrument.name)
|
| 620 |
+
for note in instrument.notes:
|
| 621 |
+
original_duration = note.end - note.start
|
| 622 |
+
# Quantize start
|
| 623 |
+
new_start = round(note.start / grid_s) * grid_s
|
| 624 |
+
if preserve_duration:
|
| 625 |
+
new_end = new_start + original_duration
|
| 626 |
+
elif quantize_end:
|
| 627 |
+
new_end = round(note.end / grid_s) * grid_s
|
| 628 |
+
else:
|
| 629 |
+
new_end = note.end
|
| 630 |
+
|
| 631 |
+
# Sustain pedal extension
|
| 632 |
+
for t, active in sustain_times:
|
| 633 |
+
if t >= note.start and active:
|
| 634 |
+
new_end = max(new_end, new_start + grid_s * 2)
|
| 635 |
+
|
| 636 |
+
# Safety check
|
| 637 |
+
if new_end <= new_start:
|
| 638 |
+
new_end = new_start + grid_s * 0.5
|
| 639 |
+
|
| 640 |
+
quantized_instrument.notes.append(pretty_midi.Note(
|
| 641 |
+
velocity=note.velocity,
|
| 642 |
+
pitch=note.pitch,
|
| 643 |
+
start=new_start,
|
| 644 |
+
end=new_end
|
| 645 |
+
))
|
| 646 |
+
|
| 647 |
+
new_segment_midi.instruments.append(quantized_instrument)
|
| 648 |
+
processed_segments.append(new_segment_midi)
|
| 649 |
+
|
| 650 |
+
return _recombine_segments(processed_segments) if enable_segmentation else processed_segments[0]
|
| 651 |
+
|
| 652 |
+
|
| 653 |
+
def process_velocity_pm(
|
| 654 |
+
midi_obj: pretty_midi.PrettyMIDI,
|
| 655 |
+
mode=["None"], # list of modes: "Smooth", "Compress"
|
| 656 |
+
smooth_factor=0.5, # weight for smoothing
|
| 657 |
+
compress_min=30,
|
| 658 |
+
compress_max=100,
|
| 659 |
+
compress_type="linear", # "linear" or "perceptual"
|
| 660 |
+
inplace=True # if False, return a copy
|
| 661 |
+
):
|
| 662 |
+
"""Applies velocity processing to a PrettyMIDI object with smoothing and/or compression."""
|
| 663 |
+
if not inplace:
|
| 664 |
+
import copy
|
| 665 |
+
midi_obj = copy.deepcopy(midi_obj)
|
| 666 |
+
|
| 667 |
+
if isinstance(mode, str):
|
| 668 |
+
mode = [mode]
|
| 669 |
+
if "None" in mode or not mode:
|
| 670 |
+
return midi_obj
|
| 671 |
+
|
| 672 |
+
print(f" - Processing velocities (Mode: {mode})...")
|
| 673 |
+
|
| 674 |
+
for instrument in midi_obj.instruments:
|
| 675 |
+
if instrument.is_drum or not instrument.notes:
|
| 676 |
continue
|
| 677 |
|
| 678 |
+
velocities = [n.velocity for n in instrument.notes]
|
| 679 |
+
|
| 680 |
+
# Smooth velocity
|
| 681 |
+
if "Smooth" in mode:
|
| 682 |
+
new_velocities = list(velocities)
|
| 683 |
+
n_notes = len(velocities)
|
| 684 |
+
for i in range(n_notes):
|
| 685 |
+
if i == 0:
|
| 686 |
+
neighbor_avg = velocities[i+1]
|
| 687 |
+
elif i == n_notes - 1:
|
| 688 |
+
neighbor_avg = velocities[i-1]
|
| 689 |
+
else:
|
| 690 |
+
neighbor_avg = (velocities[i-1] + velocities[i+1]) / 2.0
|
| 691 |
+
smoothed_vel = velocities[i] * (1 - smooth_factor) + neighbor_avg * smooth_factor
|
| 692 |
+
new_velocities[i] = int(max(1, min(127, smoothed_vel)))
|
| 693 |
+
for i, note in enumerate(instrument.notes):
|
| 694 |
+
note.velocity = new_velocities[i]
|
| 695 |
+
|
| 696 |
+
# Compress velocity
|
| 697 |
+
if "Compress" in mode:
|
| 698 |
+
velocities = [n.velocity for n in instrument.notes] # updated if smoothed first
|
| 699 |
+
min_vel, max_vel = min(velocities), max(velocities)
|
| 700 |
+
if max_vel == min_vel:
|
| 701 |
+
continue
|
| 702 |
|
| 703 |
+
for note in instrument.notes:
|
| 704 |
+
if compress_type == "linear":
|
| 705 |
+
new_vel = compress_min + (note.velocity - min_vel) * (compress_max - compress_min) / (max_vel - min_vel)
|
| 706 |
+
elif compress_type == "perceptual":
|
| 707 |
+
# Simple gamma-style perceptual compression
|
| 708 |
+
norm = (note.velocity - min_vel) / (max_vel - min_vel)
|
| 709 |
+
gamma = 0.6 # perceptual curve
|
| 710 |
+
new_vel = compress_min + ((norm ** gamma) * (compress_max - compress_min))
|
| 711 |
+
else:
|
| 712 |
+
new_vel = note.velocity
|
| 713 |
+
note.velocity = int(max(1, min(127, new_vel)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
|
| 715 |
+
return midi_obj
|
|
|
|
| 716 |
|
|
|
|
|
|
|
| 717 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 718 |
|
| 719 |
+
# =================================================================================================
|
| 720 |
+
# === Helper Functions ===
|
| 721 |
+
# =================================================================================================
|
| 722 |
|
| 723 |
def analyze_audio_for_adaptive_params(audio_data: np.ndarray, sample_rate: int):
|
| 724 |
"""
|
|
|
|
| 2209 |
print(f"Render type: {params.render_type}")
|
| 2210 |
print(f"Soundfont bank: {params.soundfont_bank}")
|
| 2211 |
print(f"Audio render sample rate: {params.render_sample_rate}")
|
|
|
|
| 2212 |
print('=' * 70)
|
| 2213 |
+
|
| 2214 |
+
##################################
|
| 2215 |
+
|
| 2216 |
+
# --- FLOW STEP 1: Apply MIDI Post-Processing & Correction Suite ---
|
| 2217 |
+
if getattr(params, 'enable_midi_corrections', False):
|
| 2218 |
+
print("Applying MIDI Post-Processing & Corrections (on pretty_midi object)...")
|
| 2219 |
+
|
| 2220 |
+
# --- FLOW STEP 2: Load into pretty_midi for corrections ---
|
| 2221 |
+
try:
|
| 2222 |
+
midi_obj = pretty_midi.PrettyMIDI(io.BytesIO(fdata))
|
| 2223 |
+
print("Successfully loaded MIDI into pretty_midi for corrections.")
|
| 2224 |
+
except Exception as e:
|
| 2225 |
+
print(f"Fatal Error: Could not load the input MIDI with pretty_midi. Cannot proceed. Error: {e}")
|
| 2226 |
+
return ("N/A", fn1, f"MIDI file is corrupted or in an unsupported format. Error: {e}", None, None, None, "MIDI Load Error")
|
| 2227 |
+
|
| 2228 |
+
# Get common segmentation parameters
|
| 2229 |
+
enable_segmentation = getattr(params, 'correction_rhythm_stab_by_segment', True)
|
| 2230 |
+
silence_threshold_s = getattr(params, 'correction_rhythm_stab_segment_silence_s', 1.0)
|
| 2231 |
+
|
| 2232 |
+
# Correction Order: Filter -> Stabilize -> Simplify -> Quantize -> Velocity
|
| 2233 |
+
|
| 2234 |
+
# 1. Filter spurious notes (does not need segmentation)
|
| 2235 |
+
if getattr(params, 'correction_filter_spurious_notes', False):
|
| 2236 |
+
midi_obj = filter_spurious_notes_pm(
|
| 2237 |
+
midi_obj,
|
| 2238 |
+
max_dur_s=getattr(params, 'correction_spurious_duration_ms', 50) / 1000.0,
|
| 2239 |
+
max_vel=getattr(params, 'correction_spurious_velocity', 20)
|
| 2240 |
+
)
|
| 2241 |
+
|
| 2242 |
+
# 2. Stabilize rhythm
|
| 2243 |
+
if getattr(params, 'correction_remove_abnormal_rhythm', False):
|
| 2244 |
+
midi_obj = stabilize_rhythm_pm(
|
| 2245 |
+
midi_obj,
|
| 2246 |
+
enable_segmentation=enable_segmentation,
|
| 2247 |
+
silence_threshold_s=silence_threshold_s
|
| 2248 |
+
)
|
| 2249 |
+
|
| 2250 |
+
# 3. Simplify rhythm
|
| 2251 |
+
simplification_level = getattr(params, 'correction_rhythmic_simplification_level', "None")
|
| 2252 |
+
if simplification_level != "None":
|
| 2253 |
+
midi_obj = simplify_rhythm_pm(
|
| 2254 |
+
midi_obj,
|
| 2255 |
+
simplification_level_str=simplification_level,
|
| 2256 |
+
enable_segmentation=enable_segmentation,
|
| 2257 |
+
silence_threshold_s=silence_threshold_s
|
| 2258 |
+
)
|
| 2259 |
+
|
| 2260 |
+
# 4. Quantize rhythm
|
| 2261 |
+
quantize_level = getattr(params, 'correction_quantize_level', "None")
|
| 2262 |
+
if quantize_level != "None":
|
| 2263 |
+
midi_obj = quantize_pm(
|
| 2264 |
+
midi_obj,
|
| 2265 |
+
quantize_level_str=quantize_level,
|
| 2266 |
+
enable_segmentation=enable_segmentation,
|
| 2267 |
+
silence_threshold_s=silence_threshold_s
|
| 2268 |
+
)
|
| 2269 |
+
|
| 2270 |
+
# 5. Process velocity (does not need segmentation)
|
| 2271 |
+
velocity_mode = getattr(params, 'correction_velocity_mode', "None")
|
| 2272 |
+
if velocity_mode != "None":
|
| 2273 |
+
midi_obj = process_velocity_pm(
|
| 2274 |
+
midi_obj,
|
| 2275 |
+
mode=[velocity_mode],
|
| 2276 |
+
smooth_factor=getattr(params, 'correction_velocity_smooth_factor', 0.5),
|
| 2277 |
+
compress_min=getattr(params, 'correction_velocity_compress_min', 30),
|
| 2278 |
+
compress_max=getattr(params, 'correction_velocity_compress_max', 100)
|
| 2279 |
+
)
|
| 2280 |
+
|
| 2281 |
+
# --- FLOW STEP 3: Convert the corrected pretty_midi object back to binary data ---
|
| 2282 |
+
corrected_midi_io = io.BytesIO()
|
| 2283 |
+
midi_obj.write(corrected_midi_io)
|
| 2284 |
+
fdata = corrected_midi_io.getvalue()
|
| 2285 |
+
|
| 2286 |
+
print("Corrections finished.")
|
| 2287 |
+
print('=' * 70)
|
| 2288 |
|
| 2289 |
+
##################################
|
| 2290 |
+
|
| 2291 |
# --- MIDI Processing using TMIDIX ---
|
| 2292 |
print('Processing MIDI... Please wait...')
|
| 2293 |
raw_score = MIDI.midi2single_track_ms_score(fdata)
|
|
|
|
| 2434 |
o[1] *= 200
|
| 2435 |
o[2] *= 200
|
| 2436 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2437 |
# --- Saving Processed MIDI File ---
|
| 2438 |
# Save the transformed MIDI data
|
| 2439 |
SONG, patches, _ = TMIDIX.patch_enhanced_score_notes(output_score)
|
|
|
|
| 4338 |
correction_remove_abnormal_rhythm = gr.Checkbox(label="Stabilize Rhythm (for Pitch Bend)", value=False,
|
| 4339 |
info="Attempts to merge overly dense, rhythmically unstable notes often created when 'Allow Multiple Pitch Bends' is used. This can clean up the rhythm but may lose some pitch slide nuance.")
|
| 4340 |
with gr.Group(visible=False) as rhythm_stab_options: # This group is initially hidden
|
| 4341 |
+
correction_rhythm_stab_by_segment = gr.Checkbox(label="Enable Segmentation by Silence", value=True,
|
| 4342 |
info="Highly recommended for albums or long files. Splits the MIDI by silent parts before stabilizing rhythm, ensuring accuracy for songs with different tempos.")
|
| 4343 |
correction_rhythm_stab_segment_silence_s = gr.Slider(minimum=0.5, maximum=10.0, value=1.0, step=0.5,
|
| 4344 |
label="Silence Threshold for Segmentation (seconds)",
|
| 4345 |
info="The amount of silence required to start a new segment. 1-3 seconds is usually enough to separate songs on an album.")
|
| 4346 |
+
# --- Rhythmic Simplification Group ---
|
| 4347 |
+
with gr.Group():
|
| 4348 |
+
correction_rhythmic_simplification_level = gr.Dropdown(
|
| 4349 |
+
["None", "1/16", "1/12", "1/8", "1/4"],
|
| 4350 |
+
value="None",
|
| 4351 |
+
label="Simplify Rhythm (Experimental)",
|
| 4352 |
+
info="WARNING: This is a destructive process that removes notes to slow down the rhythm. Select a target grid; only the most important note within each grid cell will be kept and its duration extended."
|
| 4353 |
+
)
|
| 4354 |
# --- Quantization Group ---
|
| 4355 |
with gr.Group():
|
| 4356 |
correction_quantize_level = gr.Dropdown(
|
| 4357 |
+
["None", "Auto-Analyze Rhythm", "1/64", "1/32", "1/16", "1/8", "1/4", "1/24", "1/12"],
|
| 4358 |
value="None",
|
| 4359 |
label="Quantize Rhythm",
|
| 4360 |
+
info="Quantizes notes to the nearest rhythmic grid line. '1/16' is recommended for most pop and rock music. For expressive genres like classical or jazz, use with caution as it may reduce natural timing nuances. Straight divisions (1/8, 1/16, etc.) suit most modern music, while swing divisions (1/12, 1/24) are ideal for jazz, blues, or shuffle styles. 'Auto-Analyze' is highly recommended for albums or files with mixed tempos, as it will automatically determine the best grid (straight or swing) for each song segment."
|
| 4361 |
)
|
| 4362 |
# --- Velocity Processing Group ---
|
| 4363 |
with gr.Group():
|