Module pysimt.metrics.simnmt
Simultaneous MT latency metrics.
Expand source code
"""Simultaneous MT latency metrics."""
import numpy as np
from .metric import Metric
class AVPScorer:
"""Average Proportion metric (Cho and Esipova, 2016)"""
def __init__(self, add_trg_eos=True):
self.name = 'AVP'
self.add_trg_eos = add_trg_eos
def compute(self, actions):
"""`actions` is a list of strings where each string is a space-separated
sequence of 0s/1s or R/Ws for READs and WRITEs, respectively."""
delays = self.compute_delays(self.__process_actions(actions))
return Metric(self.name, delays.mean(), higher_better=False)
def __process_actions(self, actions):
"""Map R/Ws to 0/1s if any, convert each to numpy array."""
# '3' could be used as padding, just remove it
_lut = {ord('R'): '0', ord('W'): '1', ord('3'): None}
if self.add_trg_eos:
return [np.array(a.strip().translate(_lut).strip().split() + ['1'], dtype='int') for a in actions]
else:
return [np.array(a.strip().translate(_lut).strip().split(), dtype='int') for a in actions]
def compute_delays(self, np_actions):
"""Compute delay per each sequence and return with averaging."""
delays = []
for acts in np_actions:
len_y = acts.sum()
len_x = acts.size - len_y
nom = np.sum(np.cumsum(1 - acts) * acts)
delays.append(nom / (len_x * len_y))
return np.array(delays)
def compute_from_file(self, fname):
"""`fname` is a text file where each line is a space separated
sequence with either 0/1s or R/Ws."""
actions = []
with open(fname) as f:
for line in f:
actions.append(line.strip())
return self.compute(actions)
class CWMScorer(AVPScorer):
"""Mean consecutive wait metric (Gu et al., 2017)"""
def __init__(self, add_trg_eos=True):
self.name = 'CWM'
self.add_trg_eos = add_trg_eos
def compute_sequence_cw(self, actions):
a = (1 - actions).cumsum() * actions
# remove 0s
a = a[a > 0]
# compute CW
cw = a - np.pad(a, pad_width=(1, 0))[:-1]
return cw[cw > 0]
def compute_delays(self, np_actions):
"""Compute average CW per sequence."""
cws = [self.compute_sequence_cw(act).mean() for act in np_actions]
return np.array(cws)
class CWXScorer(CWMScorer):
"""Average maximum consecutive wait metric (Gu et al., 2017)"""
def __init__(self, add_trg_eos=True):
self.name = 'CWX'
self.add_trg_eos = add_trg_eos
def compute_delays(self, np_actions):
"""Compute average of max CW per sequence."""
cws = [self.compute_sequence_cw(act).max() for act in np_actions]
return np.array(cws)
class AVLScorer(AVPScorer):
"""Average Lagging metric (Ma et al., 2019)"""
def __init__(self, add_trg_eos=True):
self.name = 'AVL'
self.add_trg_eos = add_trg_eos
def compute_delays(self, np_actions):
"""Compute lag per sequence."""
lags = []
for acts in np_actions:
len_y = acts.sum()
len_x = acts.size - len_y
ratio = len_y / len_x
# cutoff point where reading ends
cutoff = acts[:np.argwhere(acts == 0).flatten()[-1] + 2].sum()
# 2nd term in eq. 8
t2 = np.arange(cutoff) / ratio
# compute cumulative reads and filter out zero's
cum_reads = (1 - acts).cumsum() * acts
cum_reads = cum_reads[cum_reads > 0]
lags.append((cum_reads[:cutoff] - t2).mean())
return np.array(lags)
Classes
class AVLScorer (add_trg_eos=True)
-
Average Lagging metric (Ma et al., 2019)
Expand source code
class AVLScorer(AVPScorer): """Average Lagging metric (Ma et al., 2019)""" def __init__(self, add_trg_eos=True): self.name = 'AVL' self.add_trg_eos = add_trg_eos def compute_delays(self, np_actions): """Compute lag per sequence.""" lags = [] for acts in np_actions: len_y = acts.sum() len_x = acts.size - len_y ratio = len_y / len_x # cutoff point where reading ends cutoff = acts[:np.argwhere(acts == 0).flatten()[-1] + 2].sum() # 2nd term in eq. 8 t2 = np.arange(cutoff) / ratio # compute cumulative reads and filter out zero's cum_reads = (1 - acts).cumsum() * acts cum_reads = cum_reads[cum_reads > 0] lags.append((cum_reads[:cutoff] - t2).mean()) return np.array(lags)
Ancestors
Methods
def compute_delays(self, np_actions)
-
Compute lag per sequence.
Expand source code
def compute_delays(self, np_actions): """Compute lag per sequence.""" lags = [] for acts in np_actions: len_y = acts.sum() len_x = acts.size - len_y ratio = len_y / len_x # cutoff point where reading ends cutoff = acts[:np.argwhere(acts == 0).flatten()[-1] + 2].sum() # 2nd term in eq. 8 t2 = np.arange(cutoff) / ratio # compute cumulative reads and filter out zero's cum_reads = (1 - acts).cumsum() * acts cum_reads = cum_reads[cum_reads > 0] lags.append((cum_reads[:cutoff] - t2).mean()) return np.array(lags)
Inherited members
class AVPScorer (add_trg_eos=True)
-
Average Proportion metric (Cho and Esipova, 2016)
Expand source code
class AVPScorer: """Average Proportion metric (Cho and Esipova, 2016)""" def __init__(self, add_trg_eos=True): self.name = 'AVP' self.add_trg_eos = add_trg_eos def compute(self, actions): """`actions` is a list of strings where each string is a space-separated sequence of 0s/1s or R/Ws for READs and WRITEs, respectively.""" delays = self.compute_delays(self.__process_actions(actions)) return Metric(self.name, delays.mean(), higher_better=False) def __process_actions(self, actions): """Map R/Ws to 0/1s if any, convert each to numpy array.""" # '3' could be used as padding, just remove it _lut = {ord('R'): '0', ord('W'): '1', ord('3'): None} if self.add_trg_eos: return [np.array(a.strip().translate(_lut).strip().split() + ['1'], dtype='int') for a in actions] else: return [np.array(a.strip().translate(_lut).strip().split(), dtype='int') for a in actions] def compute_delays(self, np_actions): """Compute delay per each sequence and return with averaging.""" delays = [] for acts in np_actions: len_y = acts.sum() len_x = acts.size - len_y nom = np.sum(np.cumsum(1 - acts) * acts) delays.append(nom / (len_x * len_y)) return np.array(delays) def compute_from_file(self, fname): """`fname` is a text file where each line is a space separated sequence with either 0/1s or R/Ws.""" actions = [] with open(fname) as f: for line in f: actions.append(line.strip()) return self.compute(actions)
Subclasses
Methods
def compute(self, actions)
-
actions
is a list of strings where each string is a space-separated sequence of 0s/1s or R/Ws for READs and WRITEs, respectively.Expand source code
def compute(self, actions): """`actions` is a list of strings where each string is a space-separated sequence of 0s/1s or R/Ws for READs and WRITEs, respectively.""" delays = self.compute_delays(self.__process_actions(actions)) return Metric(self.name, delays.mean(), higher_better=False)
def compute_delays(self, np_actions)
-
Compute delay per each sequence and return with averaging.
Expand source code
def compute_delays(self, np_actions): """Compute delay per each sequence and return with averaging.""" delays = [] for acts in np_actions: len_y = acts.sum() len_x = acts.size - len_y nom = np.sum(np.cumsum(1 - acts) * acts) delays.append(nom / (len_x * len_y)) return np.array(delays)
def compute_from_file(self, fname)
-
fname
is a text file where each line is a space separated sequence with either 0/1s or R/Ws.Expand source code
def compute_from_file(self, fname): """`fname` is a text file where each line is a space separated sequence with either 0/1s or R/Ws.""" actions = [] with open(fname) as f: for line in f: actions.append(line.strip()) return self.compute(actions)
class CWMScorer (add_trg_eos=True)
-
Mean consecutive wait metric (Gu et al., 2017)
Expand source code
class CWMScorer(AVPScorer): """Mean consecutive wait metric (Gu et al., 2017)""" def __init__(self, add_trg_eos=True): self.name = 'CWM' self.add_trg_eos = add_trg_eos def compute_sequence_cw(self, actions): a = (1 - actions).cumsum() * actions # remove 0s a = a[a > 0] # compute CW cw = a - np.pad(a, pad_width=(1, 0))[:-1] return cw[cw > 0] def compute_delays(self, np_actions): """Compute average CW per sequence.""" cws = [self.compute_sequence_cw(act).mean() for act in np_actions] return np.array(cws)
Ancestors
Subclasses
Methods
def compute_delays(self, np_actions)
-
Compute average CW per sequence.
Expand source code
def compute_delays(self, np_actions): """Compute average CW per sequence.""" cws = [self.compute_sequence_cw(act).mean() for act in np_actions] return np.array(cws)
def compute_sequence_cw(self, actions)
-
Expand source code
def compute_sequence_cw(self, actions): a = (1 - actions).cumsum() * actions # remove 0s a = a[a > 0] # compute CW cw = a - np.pad(a, pad_width=(1, 0))[:-1] return cw[cw > 0]
Inherited members
class CWXScorer (add_trg_eos=True)
-
Average maximum consecutive wait metric (Gu et al., 2017)
Expand source code
class CWXScorer(CWMScorer): """Average maximum consecutive wait metric (Gu et al., 2017)""" def __init__(self, add_trg_eos=True): self.name = 'CWX' self.add_trg_eos = add_trg_eos def compute_delays(self, np_actions): """Compute average of max CW per sequence.""" cws = [self.compute_sequence_cw(act).max() for act in np_actions] return np.array(cws)
Ancestors
Methods
def compute_delays(self, np_actions)
-
Compute average of max CW per sequence.
Expand source code
def compute_delays(self, np_actions): """Compute average of max CW per sequence.""" cws = [self.compute_sequence_cw(act).max() for act in np_actions] return np.array(cws)
Inherited members