Files
i6_setups/hsmm/hsmm_inference.py
2026-01-21 16:30:17 +01:00

71 lines
2.6 KiB
Python

import torch
import torch.nn.functional as F
def viterbi_decode(model, x):
"""
Returns the optimal sequence of states (path).
"""
with torch.no_grad():
T = x.shape[0]
N = model.n_states
D_max = model.max_dur
# 1. Setup Probs
log_emit = model.compute_emission_log_probs(x)
log_trans = F.log_softmax(model.get_masked_transitions(), dim=1)
log_dur = F.log_softmax(model.dur_logits, dim=1)
log_pi = F.log_softmax(model.pi_logits, dim=0)
# 2. Viterbi Tables
# max_prob[t, s] = Best log-prob ending at t in state s
max_prob = torch.full((T, N), -float('inf'), device=x.device)
# backpointers[t, s] = (previous_state, duration_used)
backpointers = {}
# 3. Dynamic Programming
for t in range(T):
for d in range(1, D_max + 1):
if t - d + 1 < 0: continue
# Emission sum for segment
seg_emit = log_emit[t-d+1 : t+1].sum(dim=0)
dur_prob = log_dur[:, d-1]
if t - d + 1 == 0:
# Init
score = log_pi + dur_prob + seg_emit
for s in range(N):
if score[s] > max_prob[t, s]:
max_prob[t, s] = score[s]
backpointers[(t, s)] = (-1, d) # -1 is Start
else:
# Transition
prev_scores = max_prob[t-d] # (N,)
# Find best transition for each target state s
# (N, 1) + (N, N) -> (N, N)
trans_scores = prev_scores.unsqueeze(1) + log_trans
best_prev_score, best_prev_idx = trans_scores.max(dim=0) # (N,)
current_score = best_prev_score + dur_prob + seg_emit
for s in range(N):
if current_score[s] > max_prob[t, s]:
max_prob[t, s] = current_score[s]
backpointers[(t, s)] = (best_prev_idx[s].item(), d)
# 4. Backtracking
best_end_state = torch.argmax(max_prob[T-1]).item()
path = []
curr_t = T - 1
curr_s = best_end_state
while curr_t >= 0:
if (curr_t, curr_s) not in backpointers: break
prev_s, d = backpointers[(curr_t, curr_s)]
# Append this state 'd' times
path = [curr_s] * d + path
curr_t -= d
curr_s = prev_s
return path