feat: Implement advanced MIDI experimental correction tools
Browse filesAdds a new suite of post-processing tools to allow for advanced refinement of transcribed MIDI files.
New features include:
- Spurious (noise) note filtering based on duration and velocity
- Rhythm Stabilization with silence-based segmentation
- Rhythmic Quantization for both straight and swing/triplet feels
- Velocity processing with Smoothing and Compression modes
app.py
CHANGED
|
@@ -152,6 +152,20 @@ class AppParameters:
|
|
| 152 |
render_output_as_solo_piano: bool = False
|
| 153 |
render_remove_drums: bool = False
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
# 8-bit Synthesizer Settings
|
| 156 |
s8bit_waveform_type: str = 'Square'
|
| 157 |
s8bit_pulse_width: float = 0.5
|
|
@@ -209,7 +223,7 @@ class AppParameters:
|
|
| 209 |
s8bit_delay_division: str = "Dotted 8th Note"
|
| 210 |
s8bit_delay_feedback: float = 0.5 # Velocity scale for each subsequent echo (50%)
|
| 211 |
s8bit_delay_repeats: int = 3 # Number of echoes to generate
|
| 212 |
-
# ---
|
| 213 |
s8bit_delay_highpass_cutoff_hz: int = 100 # High-pass filter frequency for delay echoes (removes low-end rumble from echoes)
|
| 214 |
s8bit_delay_bass_pitch_shift: int = 0 # Pitch shift (in semitones) applied to low notes in delay echoes
|
| 215 |
# --- High-End Management for Delay ---
|
|
@@ -220,6 +234,274 @@ class AppParameters:
|
|
| 220 |
# === Helper Functions ===
|
| 221 |
# =================================================================================================
|
| 222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
def analyze_audio_for_adaptive_params(audio_data: np.ndarray, sample_rate: int):
|
| 224 |
"""
|
| 225 |
Analyzes raw audio data to dynamically determine optimal parameters for basic-pitch.
|
|
@@ -1858,6 +2140,50 @@ def Render_MIDI(*, input_midi_path: str, params: AppParameters, progress: gr.Pro
|
|
| 1858 |
o[1] *= 200
|
| 1859 |
o[2] *= 200
|
| 1860 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1861 |
print('Final adjustments complete.')
|
| 1862 |
print('=' * 70)
|
| 1863 |
|
|
@@ -3484,6 +3810,15 @@ if __name__ == "__main__":
|
|
| 3484 |
updates[enable_advanced_separation] = gr.update(visible=is_demucs, value=False)
|
| 3485 |
return updates
|
| 3486 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3487 |
# --- Use the dataclass to define the master list of parameter keys ---
|
| 3488 |
# This is now the single source of truth for parameter order.
|
| 3489 |
ALL_PARAM_KEYS = [field.name for field in fields(AppParameters) if field.name not in ["input_file", "batch_input_files"]]
|
|
@@ -3726,6 +4061,67 @@ if __name__ == "__main__":
|
|
| 3726 |
info="Quantizes the score to a fixed bar length. 'Start Times' aligns onsets. "
|
| 3727 |
"'Durations' trims notes at the bar line. 'Split Durations' splits notes that cross the bar line."
|
| 3728 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3729 |
|
| 3730 |
with gr.Column(scale=1):
|
| 3731 |
# --- 8-bit Synthesizer Settings ---
|
|
@@ -4134,7 +4530,7 @@ if __name__ == "__main__":
|
|
| 4134 |
s8bit_ui_keys = [key for key in ALL_PARAM_KEYS if key.startswith('s8bit_')]
|
| 4135 |
s8bit_ui_components = [ui_component_map[key] for key in s8bit_ui_keys]
|
| 4136 |
|
| 4137 |
-
#
|
| 4138 |
s8bit_control_components = [comp for comp in s8bit_ui_components if comp != s8bit_preset_selector]
|
| 4139 |
|
| 4140 |
# The list of basic_pitch UI components that can be updated by its preset selector.
|
|
@@ -4276,6 +4672,23 @@ if __name__ == "__main__":
|
|
| 4276 |
inputs=s8bit_enable_delay,
|
| 4277 |
outputs=delay_settings_box
|
| 4278 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4279 |
|
| 4280 |
# Launch the Gradio app
|
| 4281 |
app.queue().launch(inbrowser=True, debug=True)
|
|
|
|
| 152 |
render_output_as_solo_piano: bool = False
|
| 153 |
render_remove_drums: bool = False
|
| 154 |
|
| 155 |
+
# EXPERIMENTAL: MIDI Post-Processing & Correction Tools
|
| 156 |
+
enable_midi_corrections: bool = False # Master switch for enabling MIDI correction tools
|
| 157 |
+
correction_filter_spurious_notes: bool = True # Enable filtering of spurious (noise) notes
|
| 158 |
+
correction_spurious_duration_ms: int = 50 # Maximum duration (ms) for a note to be considered spurious
|
| 159 |
+
correction_spurious_velocity: int = 20 # Maximum velocity for a note to be considered spurious
|
| 160 |
+
correction_remove_abnormal_rhythm: bool = False # Enable rhythm stabilization for abnormal rhythm
|
| 161 |
+
correction_rhythm_stab_by_segment: bool = False # Enable segmentation by silence before rhythm stabilization
|
| 162 |
+
correction_rhythm_stab_segment_silence_s: float = 1.0 # Silence threshold (seconds) for segmenting MIDI
|
| 163 |
+
correction_quantize_level: str = "None" # Quantization level for note timing (e.g., "1/16", "None")
|
| 164 |
+
correction_velocity_mode: str = "None" # Velocity processing mode ("None", "Smooth", "Compress")
|
| 165 |
+
correction_velocity_smooth_factor: float = 0.5 # Smoothing factor for velocity processing
|
| 166 |
+
correction_velocity_compress_min: int = 30 # Minimum velocity after compression
|
| 167 |
+
correction_velocity_compress_max: int = 100 # Maximum velocity after compression
|
| 168 |
+
|
| 169 |
# 8-bit Synthesizer Settings
|
| 170 |
s8bit_waveform_type: str = 'Square'
|
| 171 |
s8bit_pulse_width: float = 0.5
|
|
|
|
| 223 |
s8bit_delay_division: str = "Dotted 8th Note"
|
| 224 |
s8bit_delay_feedback: float = 0.5 # Velocity scale for each subsequent echo (50%)
|
| 225 |
s8bit_delay_repeats: int = 3 # Number of echoes to generate
|
| 226 |
+
# --- Low-End Management for Delay ---
|
| 227 |
s8bit_delay_highpass_cutoff_hz: int = 100 # High-pass filter frequency for delay echoes (removes low-end rumble from echoes)
|
| 228 |
s8bit_delay_bass_pitch_shift: int = 0 # Pitch shift (in semitones) applied to low notes in delay echoes
|
| 229 |
# --- High-End Management for Delay ---
|
|
|
|
| 234 |
# === Helper Functions ===
|
| 235 |
# =================================================================================================
|
| 236 |
|
| 237 |
+
|
| 238 |
+
def quantize_escore(escore, bpm, quantize_level_str="1/16"):
|
| 239 |
+
"""
|
| 240 |
+
Quantizes the start times of notes in an escore to a rhythmic grid.
|
| 241 |
+
|
| 242 |
+
Args:
|
| 243 |
+
escore (list): The list of events.
|
| 244 |
+
bpm (float): The Beats Per Minute of the track.
|
| 245 |
+
quantize_level_str (str): The quantization level, e.g., "1/8", "1/16", "1/32".
|
| 246 |
+
|
| 247 |
+
Returns:
|
| 248 |
+
list: The quantized escore.
|
| 249 |
+
"""
|
| 250 |
+
print(f" - Quantizing notes to {quantize_level_str} at {bpm:.2f} BPM...")
|
| 251 |
+
|
| 252 |
+
level_map = {
|
| 253 |
+
"1/4": 1.0,
|
| 254 |
+
"1/8": 2.0,
|
| 255 |
+
"1/12": 3.0, # 3 notes per beat
|
| 256 |
+
"1/16": 4.0,
|
| 257 |
+
"1/24": 6.0, # 6 notes per beat
|
| 258 |
+
"1/32": 8.0,
|
| 259 |
+
"1/64": 16.0
|
| 260 |
+
}
|
| 261 |
+
division = level_map.get(quantize_level_str)
|
| 262 |
+
if not division:
|
| 263 |
+
print(" - Invalid quantization level. Skipping.")
|
| 264 |
+
return escore
|
| 265 |
+
|
| 266 |
+
# Calculate the duration of a single grid step in milliseconds
|
| 267 |
+
grid_ms = (60000.0 / bpm) / division
|
| 268 |
+
|
| 269 |
+
quantized_escore = []
|
| 270 |
+
notes_quantized = 0
|
| 271 |
+
for event in escore:
|
| 272 |
+
# Only quantize note events (which start with a number)
|
| 273 |
+
if isinstance(event[0], (int, float)):
|
| 274 |
+
original_start_time = event[0]
|
| 275 |
+
# The core quantization logic: find the nearest grid point
|
| 276 |
+
quantized_start_time = round(original_start_time / grid_ms) * grid_ms
|
| 277 |
+
event[0] = int(quantized_start_time)
|
| 278 |
+
notes_quantized += 1
|
| 279 |
+
quantized_escore.append(event)
|
| 280 |
+
|
| 281 |
+
print(f" - Quantized {notes_quantized} notes.")
|
| 282 |
+
return quantized_escore
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def filter_spurious_notes_escore(escore, max_dur_ms=50, max_vel=20):
|
| 286 |
+
"""
|
| 287 |
+
Filters out very short and quiet notes that are likely transcription noise.
|
| 288 |
+
|
| 289 |
+
Args:
|
| 290 |
+
escore (list): The list of events.
|
| 291 |
+
max_dur_ms (int): Notes with duration shorter than this will be considered.
|
| 292 |
+
max_vel (int): Notes with velocity lower than this will be considered.
|
| 293 |
+
|
| 294 |
+
Returns:
|
| 295 |
+
list: The cleaned escore.
|
| 296 |
+
"""
|
| 297 |
+
print(f" - Filtering spurious notes (duration < {max_dur_ms}ms AND velocity < {max_vel})...")
|
| 298 |
+
|
| 299 |
+
note_events = [note for note in escore if isinstance(note[0], (int, float))]
|
| 300 |
+
metadata_events = [meta for meta in escore if not isinstance(meta[0], (int, float))]
|
| 301 |
+
|
| 302 |
+
# The condition for keeping a note is that it's NOT a spurious note
|
| 303 |
+
cleaned_notes = [
|
| 304 |
+
note for note in note_events
|
| 305 |
+
if not (note[1] < max_dur_ms and note[3] < max_vel)
|
| 306 |
+
]
|
| 307 |
+
|
| 308 |
+
notes_removed = len(note_events) - len(cleaned_notes)
|
| 309 |
+
print(f" - Removed {notes_removed} spurious notes.")
|
| 310 |
+
|
| 311 |
+
# Recombine and re-sort
|
| 312 |
+
final_escore = metadata_events + cleaned_notes
|
| 313 |
+
final_escore.sort(key=lambda event: event[1] if isinstance(event[0], str) else event[0])
|
| 314 |
+
return final_escore
|
| 315 |
+
|
| 316 |
+
def process_velocity_escore(escore, mode="None", smooth_factor=0.5, compress_min=30, compress_max=100):
|
| 317 |
+
"""
|
| 318 |
+
Applies smoothing or compression to note velocities.
|
| 319 |
+
|
| 320 |
+
Args:
|
| 321 |
+
escore (list): The list of events.
|
| 322 |
+
mode (str): "Smooth", "Compress", or "None".
|
| 323 |
+
smooth_factor (float): How much to blend with neighbors (0=none, 1=full average).
|
| 324 |
+
compress_min (int): The target minimum velocity for compression.
|
| 325 |
+
compress_max (int): The target maximum velocity for compression.
|
| 326 |
+
|
| 327 |
+
Returns:
|
| 328 |
+
list: The escore with processed velocities.
|
| 329 |
+
"""
|
| 330 |
+
if mode == "None":
|
| 331 |
+
return escore
|
| 332 |
+
|
| 333 |
+
print(f" - Processing velocities with mode: {mode}...")
|
| 334 |
+
|
| 335 |
+
note_events = [note for note in escore if isinstance(note[0], (int, float))]
|
| 336 |
+
metadata_events = [meta for meta in escore if not isinstance(meta[0], (int, float))]
|
| 337 |
+
|
| 338 |
+
if not note_events:
|
| 339 |
+
return escore
|
| 340 |
+
|
| 341 |
+
velocities = [note[3] for note in note_events]
|
| 342 |
+
|
| 343 |
+
if mode == "Smooth":
|
| 344 |
+
new_velocities = list(velocities) # Start with a copy
|
| 345 |
+
# Iterate from the second to the second-to-last note
|
| 346 |
+
for i in range(1, len(velocities) - 1):
|
| 347 |
+
prev_vel = velocities[i-1]
|
| 348 |
+
current_vel = velocities[i]
|
| 349 |
+
next_vel = velocities[i+1]
|
| 350 |
+
neighbor_avg = (prev_vel + next_vel) / 2.0
|
| 351 |
+
# Blend the current velocity with the average of its neighbors
|
| 352 |
+
smoothed_vel = (current_vel * (1 - smooth_factor)) + (neighbor_avg * smooth_factor)
|
| 353 |
+
new_velocities[i] = int(max(1, min(127, smoothed_vel)))
|
| 354 |
+
|
| 355 |
+
for i, note in enumerate(note_events):
|
| 356 |
+
note[3] = new_velocities[i]
|
| 357 |
+
print(f" - Smoothed {len(note_events)} velocities.")
|
| 358 |
+
|
| 359 |
+
elif mode == "Compress":
|
| 360 |
+
min_vel_orig = min(velocities)
|
| 361 |
+
max_vel_orig = max(velocities)
|
| 362 |
+
|
| 363 |
+
# Avoid division by zero if all notes have the same velocity
|
| 364 |
+
if max_vel_orig == min_vel_orig:
|
| 365 |
+
return escore
|
| 366 |
+
|
| 367 |
+
for note in note_events:
|
| 368 |
+
# Linear mapping from original range to target range
|
| 369 |
+
original_vel = note[3]
|
| 370 |
+
new_vel = compress_min + (original_vel - min_vel_orig) * \
|
| 371 |
+
(compress_max - compress_min) / (max_vel_orig - min_vel_orig)
|
| 372 |
+
note[3] = int(max(1, min(127, new_vel)))
|
| 373 |
+
print(f" - Compressed {len(note_events)} velocities to range [{compress_min}, {compress_max}].")
|
| 374 |
+
|
| 375 |
+
final_escore = metadata_events + note_events
|
| 376 |
+
final_escore.sort(key=lambda event: event[1] if isinstance(event[0], str) else event[0])
|
| 377 |
+
return final_escore
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def stabilize_midi_rhythm(escore,
|
| 381 |
+
ioi_threshold_ratio=0.30,
|
| 382 |
+
min_ioi_ms=30,
|
| 383 |
+
enable_segmentation=True,
|
| 384 |
+
silence_split_threshold_s=2.0):
|
| 385 |
+
"""
|
| 386 |
+
Removes or merges rhythmically unstable notes from an escore list.
|
| 387 |
+
This is designed to clean up MIDI generated by basic-pitch with multiple pitch bends,
|
| 388 |
+
which can create clusters of very short, dense notes to approximate a slide.
|
| 389 |
+
This version can segment the MIDI based on silence before processing, making it robust
|
| 390 |
+
for files containing multiple songs with different tempos (like an album).
|
| 391 |
+
|
| 392 |
+
Args:
|
| 393 |
+
escore (list): The list of events, which can include notes and metadata strings.
|
| 394 |
+
ioi_threshold_ratio (float): The ratio of the median IOI below which a note is considered unstable.
|
| 395 |
+
min_ioi_ms (int): An absolute minimum IOI in milliseconds.
|
| 396 |
+
enable_segmentation (bool): If True, splits the notes into segments based on silence.
|
| 397 |
+
silence_split_threshold_s (float): The duration of silence in seconds to define a new segment.
|
| 398 |
+
|
| 399 |
+
Returns:
|
| 400 |
+
list: The cleaned escore with unstable notes removed or merged, and metadata preserved.
|
| 401 |
+
"""
|
| 402 |
+
# 1. Separate note events from metadata events based on the type of the first element
|
| 403 |
+
note_events = [note for note in escore if isinstance(note[0], (int, float))]
|
| 404 |
+
metadata_events = [meta for meta in escore if not isinstance(meta[0], (int, float))]
|
| 405 |
+
|
| 406 |
+
# Only proceed if there are enough notes to analyze for a stable rhythm
|
| 407 |
+
if len(note_events) < 20:
|
| 408 |
+
print(" - Rhythm stabilization skipped: not enough notes to analyze.")
|
| 409 |
+
return escore # Return original escore if there's nothing to process
|
| 410 |
+
|
| 411 |
+
print(" - Running rhythm stabilization...")
|
| 412 |
+
|
| 413 |
+
# Ensure notes are sorted by start time before processing, as this is critical for IOI calculation
|
| 414 |
+
note_events.sort(key=lambda x: x[0])
|
| 415 |
+
|
| 416 |
+
# 2. Segment the notes based on silence if enabled
|
| 417 |
+
segments = []
|
| 418 |
+
if enable_segmentation and len(note_events) > 1:
|
| 419 |
+
print(f" - Segmentation enabled (silence > {silence_split_threshold_s}s).")
|
| 420 |
+
current_segment = [note_events[0]]
|
| 421 |
+
silence_threshold_ms = silence_split_threshold_s * 1000
|
| 422 |
+
|
| 423 |
+
for i in range(1, len(note_events)):
|
| 424 |
+
prev_note_end_ms = note_events[i-1][0] + note_events[i-1][1]
|
| 425 |
+
current_note_start_ms = note_events[i][0]
|
| 426 |
+
gap_ms = current_note_start_ms - prev_note_end_ms
|
| 427 |
+
|
| 428 |
+
if gap_ms > silence_threshold_ms:
|
| 429 |
+
if current_segment: segments.append(current_segment)
|
| 430 |
+
current_segment = [] # Start a new segment
|
| 431 |
+
|
| 432 |
+
current_segment.append(note_events[i])
|
| 433 |
+
|
| 434 |
+
if current_segment: segments.append(current_segment) # Add the last segment
|
| 435 |
+
print(f" - Split MIDI into {len(segments)} segment(s) for individual processing.")
|
| 436 |
+
else:
|
| 437 |
+
# If segmentation is disabled, treat the entire file as a single segment
|
| 438 |
+
segments = [note_events]
|
| 439 |
+
|
| 440 |
+
# 3. Process each segment individually
|
| 441 |
+
all_cleaned_notes = []
|
| 442 |
+
total_merged_count = 0
|
| 443 |
+
|
| 444 |
+
for i, segment in enumerate(segments):
|
| 445 |
+
if len(segment) < 20: # Skip stabilization for very short segments
|
| 446 |
+
all_cleaned_notes.extend(segment)
|
| 447 |
+
continue
|
| 448 |
+
|
| 449 |
+
# --- Core stabilization logic applied per-segment ---
|
| 450 |
+
# Calculate Inter-Onset Intervals (IOIs) using only the filtered note events
|
| 451 |
+
iois = [segment[j][0] - segment[j-1][0] for j in range(1, len(segment))]
|
| 452 |
+
# Filter out zero or negative IOIs (which can happen with chords) before calculating the median
|
| 453 |
+
positive_iois = [ioi for ioi in iois if ioi > 0]
|
| 454 |
+
|
| 455 |
+
if not positive_iois:
|
| 456 |
+
all_cleaned_notes.extend(segment)
|
| 457 |
+
continue
|
| 458 |
+
|
| 459 |
+
median_ioi = np.median(positive_iois)
|
| 460 |
+
# The threshold for merging is the greater of the ratio-based value or the absolute minimum
|
| 461 |
+
threshold_ms = max(median_ioi * ioi_threshold_ratio, min_ioi_ms)
|
| 462 |
+
|
| 463 |
+
# Process only the note events to merge unstable ones
|
| 464 |
+
cleaned_segment = [copy.deepcopy(segment[0])] # Start with a deepcopy of the first note
|
| 465 |
+
notes_merged_in_segment = 0
|
| 466 |
+
|
| 467 |
+
for j in range(1, len(segment)):
|
| 468 |
+
current_note = segment[j]
|
| 469 |
+
last_kept_note = cleaned_segment[-1]
|
| 470 |
+
|
| 471 |
+
# Calculate the IOI between the current note and the last *accepted* note
|
| 472 |
+
actual_ioi = current_note[0] - last_kept_note[0]
|
| 473 |
+
# Check pitch proximity to avoid merging unrelated grace notes into main notes
|
| 474 |
+
pitch_difference = abs(current_note[2] - last_kept_note[2])
|
| 475 |
+
|
| 476 |
+
# Merge condition: notes are too close in time AND similar in pitch
|
| 477 |
+
if actual_ioi < threshold_ms and pitch_difference < 5:
|
| 478 |
+
notes_merged_in_segment += 1
|
| 479 |
+
# Merge by extending the previous note's duration to cover the current note
|
| 480 |
+
new_end_time = current_note[0] + current_note[1]
|
| 481 |
+
last_kept_note[1] = new_end_time - last_kept_note[0]
|
| 482 |
+
else:
|
| 483 |
+
# Note is rhythmically stable, so we keep it
|
| 484 |
+
cleaned_segment.append(copy.deepcopy(current_note))
|
| 485 |
+
|
| 486 |
+
if len(segments) > 1:
|
| 487 |
+
print(f" - Segment {i+1}: Median IOI {median_ioi:.2f}ms, merged {notes_merged_in_segment} notes.")
|
| 488 |
+
|
| 489 |
+
all_cleaned_notes.extend(cleaned_segment)
|
| 490 |
+
total_merged_count += notes_merged_in_segment
|
| 491 |
+
|
| 492 |
+
if total_merged_count > 0:
|
| 493 |
+
print(f" - Rhythm stabilization complete. Total merged notes: {total_merged_count}.")
|
| 494 |
+
|
| 495 |
+
# 4. Recombine metadata with the globally cleaned notes and re-sort
|
| 496 |
+
final_escore = metadata_events + all_cleaned_notes
|
| 497 |
+
|
| 498 |
+
# Re-sort the entire list by time to ensure correct MIDI event order.
|
| 499 |
+
# The sort key must handle both event types: metadata time is at index 1, note time is at index 0.
|
| 500 |
+
final_escore.sort(key=lambda event: event[1] if isinstance(event[0], str) else event[0])
|
| 501 |
+
|
| 502 |
+
return final_escore
|
| 503 |
+
|
| 504 |
+
|
| 505 |
def analyze_audio_for_adaptive_params(audio_data: np.ndarray, sample_rate: int):
|
| 506 |
"""
|
| 507 |
Analyzes raw audio data to dynamically determine optimal parameters for basic-pitch.
|
|
|
|
| 2140 |
o[1] *= 200
|
| 2141 |
o[2] *= 200
|
| 2142 |
|
| 2143 |
+
# --- MIDI Post-Processing & Correction Block ---
|
| 2144 |
+
if getattr(params, 'enable_midi_corrections', False):
|
| 2145 |
+
print("Applying MIDI Post-Processing & Corrections...")
|
| 2146 |
+
|
| 2147 |
+
# Filter spurious notes first to clean the data for other processes
|
| 2148 |
+
if getattr(params, 'correction_filter_spurious_notes', False):
|
| 2149 |
+
output_score = filter_spurious_notes_escore(
|
| 2150 |
+
output_score,
|
| 2151 |
+
max_dur_ms=getattr(params, 'correction_spurious_duration_ms', 50),
|
| 2152 |
+
max_vel=getattr(params, 'correction_spurious_velocity', 20)
|
| 2153 |
+
)
|
| 2154 |
+
|
| 2155 |
+
# Then, stabilize rhythm on the cleaned notes
|
| 2156 |
+
if getattr(params, 'correction_remove_abnormal_rhythm', False):
|
| 2157 |
+
output_score = stabilize_midi_rhythm(
|
| 2158 |
+
output_score,
|
| 2159 |
+
enable_segmentation=getattr(params, 'correction_rhythm_stab_by_segment', False),
|
| 2160 |
+
silence_split_threshold_s=getattr(params, 'correction_rhythm_stab_segment_silence_s', 1.0)
|
| 2161 |
+
)
|
| 2162 |
+
|
| 2163 |
+
# Then, quantize the stabilized rhythm
|
| 2164 |
+
quantize_level = getattr(params, 'correction_quantize_level', "None")
|
| 2165 |
+
if quantize_level != "None":
|
| 2166 |
+
try:
|
| 2167 |
+
# We need to get the BPM for quantization. We do this once here.
|
| 2168 |
+
midi_obj_for_bpm = pretty_midi.PrettyMIDI(input_midi_path)
|
| 2169 |
+
estimated_bpm = midi_obj_for_bpm.estimate_tempo()
|
| 2170 |
+
output_score = quantize_escore(output_score, estimated_bpm, quantize_level)
|
| 2171 |
+
except Exception as e:
|
| 2172 |
+
print(f" - Could not estimate BPM for quantization. Skipping. Error: {e}")
|
| 2173 |
+
|
| 2174 |
+
# Finally, process velocity as it doesn't affect timing or notes
|
| 2175 |
+
velocity_mode = getattr(params, 'correction_velocity_mode', "None")
|
| 2176 |
+
if velocity_mode != "None":
|
| 2177 |
+
output_score = process_velocity_escore(
|
| 2178 |
+
output_score,
|
| 2179 |
+
mode=velocity_mode,
|
| 2180 |
+
smooth_factor=getattr(params, 'correction_velocity_smooth_factor', 0.5),
|
| 2181 |
+
compress_min=getattr(params, 'correction_velocity_compress_min', 30),
|
| 2182 |
+
compress_max=getattr(params, 'correction_velocity_compress_max', 100)
|
| 2183 |
+
)
|
| 2184 |
+
print("Corrections finished.")
|
| 2185 |
+
print('=' * 70)
|
| 2186 |
+
|
| 2187 |
print('Final adjustments complete.')
|
| 2188 |
print('=' * 70)
|
| 2189 |
|
|
|
|
| 3810 |
updates[enable_advanced_separation] = gr.update(visible=is_demucs, value=False)
|
| 3811 |
return updates
|
| 3812 |
|
| 3813 |
+
# Event listener for the velocity processing mode dropdown
|
| 3814 |
+
def update_velocity_options(mode):
|
| 3815 |
+
is_smooth = (mode == "Smooth")
|
| 3816 |
+
is_compress = (mode == "Compress")
|
| 3817 |
+
return {
|
| 3818 |
+
correction_velocity_smooth_factor: gr.update(visible=is_smooth),
|
| 3819 |
+
velocity_compress_sliders: gr.update(visible=is_compress)
|
| 3820 |
+
}
|
| 3821 |
+
|
| 3822 |
# --- Use the dataclass to define the master list of parameter keys ---
|
| 3823 |
# This is now the single source of truth for parameter order.
|
| 3824 |
ALL_PARAM_KEYS = [field.name for field in fields(AppParameters) if field.name not in ["input_file", "batch_input_files"]]
|
|
|
|
| 4061 |
info="Quantizes the score to a fixed bar length. 'Start Times' aligns onsets. "
|
| 4062 |
"'Durations' trims notes at the bar line. 'Split Durations' splits notes that cross the bar line."
|
| 4063 |
)
|
| 4064 |
+
with gr.Accordion("EXPERIMENTAL: MIDI Post-Processing & Correction Tools", open=False):
|
| 4065 |
+
enable_midi_corrections = gr.Checkbox(
|
| 4066 |
+
label="Enable MIDI Correction Suite",
|
| 4067 |
+
value=False,
|
| 4068 |
+
info="Master switch for all post-processing tools below. Use these to clean up and refine the transcribed MIDI before rendering."
|
| 4069 |
+
)
|
| 4070 |
+
with gr.Group(visible=False) as midi_correction_settings:
|
| 4071 |
+
# --- Spurious Note Filtering Group ---
|
| 4072 |
+
with gr.Group():
|
| 4073 |
+
correction_filter_spurious_notes = gr.Checkbox(
|
| 4074 |
+
label="Filter Spurious (Noise) Notes",
|
| 4075 |
+
value=True,
|
| 4076 |
+
info="Removes very short, quiet notes that are likely transcription errors from background noise."
|
| 4077 |
+
)
|
| 4078 |
+
with gr.Row():
|
| 4079 |
+
correction_spurious_duration_ms = gr.Slider(
|
| 4080 |
+
10, 200, value=50, step=5,
|
| 4081 |
+
label="Max Duration (ms)",
|
| 4082 |
+
info="Notes shorter than this duration..."
|
| 4083 |
+
)
|
| 4084 |
+
correction_spurious_velocity = gr.Slider(
|
| 4085 |
+
1, 50, value=20, step=1,
|
| 4086 |
+
label="Max Velocity",
|
| 4087 |
+
info="...and quieter than this velocity will be removed."
|
| 4088 |
+
)
|
| 4089 |
+
# --- stabilize rhythm on the cleaned notes ---
|
| 4090 |
+
with gr.Group():
|
| 4091 |
+
correction_remove_abnormal_rhythm = gr.Checkbox(label="Stabilize Rhythm (for Pitch Bend)", value=False,
|
| 4092 |
+
info="Attempts to merge overly dense, rhythmically unstable notes often created when 'Allow Multiple Pitch Bends' is used. This can clean up the rhythm but may lose some pitch slide nuance.")
|
| 4093 |
+
with gr.Group(visible=False) as rhythm_stab_options: # This group is initially hidden
|
| 4094 |
+
correction_rhythm_stab_by_segment = gr.Checkbox(label="Enable Segmentation by Silence", value=False,
|
| 4095 |
+
info="Highly recommended for albums or long files. Splits the MIDI by silent parts before stabilizing rhythm, ensuring accuracy for songs with different tempos.")
|
| 4096 |
+
correction_rhythm_stab_segment_silence_s = gr.Slider(minimum=0.5, maximum=10.0, value=1.0, step=0.5,
|
| 4097 |
+
label="Silence Threshold for Segmentation (seconds)",
|
| 4098 |
+
info="The amount of silence required to start a new segment. 1-3 seconds is usually enough to separate songs on an album.")
|
| 4099 |
+
# --- Quantization Group ---
|
| 4100 |
+
with gr.Group():
|
| 4101 |
+
correction_quantize_level = gr.Dropdown(
|
| 4102 |
+
["None", "1/64", "1/32", "1/16", "1/8", "1/4", "1/24", "1/12"],
|
| 4103 |
+
value="None",
|
| 4104 |
+
label="Quantize Rhythm",
|
| 4105 |
+
info="Quantizes notes to the nearest rhythmic grid line. '1/16' is recommended for most pop and rock music. For expressive genres like classical or jazz, use with caution as it may reduce natural timing nuances. Straight divisions (1/8, 1/16, etc.) suit most modern music, while swing divisions (1/12, 1/24) are ideal for jazz, blues, or shuffle styles."
|
| 4106 |
+
)
|
| 4107 |
+
# --- Velocity Processing Group ---
|
| 4108 |
+
with gr.Group():
|
| 4109 |
+
correction_velocity_mode = gr.Dropdown(
|
| 4110 |
+
["None", "Smooth", "Compress"],
|
| 4111 |
+
value="None",
|
| 4112 |
+
label="Process Velocity",
|
| 4113 |
+
info="'Smooth' reduces sudden jumps in volume. 'Compress' scales all velocities into a specific range."
|
| 4114 |
+
)
|
| 4115 |
+
with gr.Group() as velocity_options_group: # This group will have its visibility toggled
|
| 4116 |
+
correction_velocity_smooth_factor = gr.Slider(
|
| 4117 |
+
0.0, 1.0, value=0.5, step=0.05,
|
| 4118 |
+
label="Smoothing Factor",
|
| 4119 |
+
info="Controls the amount of smoothing. 0 = no change, 1 = full averaging with neighbors.",
|
| 4120 |
+
visible=False # Initially hidden
|
| 4121 |
+
)
|
| 4122 |
+
with gr.Row(visible=False) as velocity_compress_sliders: # Initially hidden
|
| 4123 |
+
correction_velocity_compress_min = gr.Slider(1, 127, value=30, step=1, label="Target Min Velocity")
|
| 4124 |
+
correction_velocity_compress_max = gr.Slider(1, 127, value=100, step=1, label="Target Max Velocity")
|
| 4125 |
|
| 4126 |
with gr.Column(scale=1):
|
| 4127 |
# --- 8-bit Synthesizer Settings ---
|
|
|
|
| 4530 |
s8bit_ui_keys = [key for key in ALL_PARAM_KEYS if key.startswith('s8bit_')]
|
| 4531 |
s8bit_ui_components = [ui_component_map[key] for key in s8bit_ui_keys]
|
| 4532 |
|
| 4533 |
+
# Create a separate list containing only the 13 controls to be updated
|
| 4534 |
s8bit_control_components = [comp for comp in s8bit_ui_components if comp != s8bit_preset_selector]
|
| 4535 |
|
| 4536 |
# The list of basic_pitch UI components that can be updated by its preset selector.
|
|
|
|
| 4672 |
inputs=s8bit_enable_delay,
|
| 4673 |
outputs=delay_settings_box
|
| 4674 |
)
|
| 4675 |
+
# Event listener to show/hide the main correction settings group
|
| 4676 |
+
enable_midi_corrections.change(
|
| 4677 |
+
fn=lambda x: gr.update(visible=x),
|
| 4678 |
+
inputs=enable_midi_corrections,
|
| 4679 |
+
outputs=midi_correction_settings
|
| 4680 |
+
)
|
| 4681 |
+
# Event listener to show/hide the rhythm stabilization sub-options
|
| 4682 |
+
correction_remove_abnormal_rhythm.change(
|
| 4683 |
+
fn=lambda x: gr.update(visible=x),
|
| 4684 |
+
inputs=correction_remove_abnormal_rhythm,
|
| 4685 |
+
outputs=rhythm_stab_options
|
| 4686 |
+
)
|
| 4687 |
+
correction_velocity_mode.change(
|
| 4688 |
+
fn=update_velocity_options,
|
| 4689 |
+
inputs=correction_velocity_mode,
|
| 4690 |
+
outputs=[correction_velocity_smooth_factor, velocity_compress_sliders]
|
| 4691 |
+
)
|
| 4692 |
|
| 4693 |
# Launch the Gradio app
|
| 4694 |
app.queue().launch(inbrowser=True, debug=True)
|