Spaces:
Runtime error
Runtime error
| import torch | |
| import nltk | |
| from nltk.translate.bleu_score import SmoothingFunction | |
| from tqdm import tqdm | |
| def calculate_perplexity(model, tokens, prompt_len, bsz=1, marker=False): | |
| """ | |
| Calculate perplexity of given tokens using provided model, ignoring padding tokens. | |
| Args: | |
| model: Llama model | |
| tokens (List[List[int]] or torch.Tensor): Input tokens (n_prompt * n_draft, seqlen) | |
| prompt_len (int): Prefix length | |
| bsz (int): Batch size | |
| marker (bool): Whether to show progress bar | |
| Returns: | |
| Perplexity across all generations (n_prompt * n_drafts) | |
| """ | |
| it = range(0, len(tokens), bsz) | |
| if marker: | |
| it = tqdm(it) | |
| start = 0 | |
| ppl = torch.zeros(len(tokens)) | |
| for start in it: | |
| end = start + bsz | |
| data = tokens[start : end] | |
| if not isinstance(data, list): | |
| data = data.tolist() | |
| # Remove any padding tokens (-1) in generations | |
| for d_idx in range(len(data)): | |
| cur = data[d_idx] | |
| if -1 in cur: | |
| data[d_idx] = cur[:cur.index(-1)] | |
| # Calculate cross entropy loss on tokens | |
| ce_loss = model.generate(data, max_gen_len=0, temperature=-1, top_p=-1, grade=True) | |
| # Cut off everything past `prompt_len` | |
| ce_loss = ce_loss[:, prompt_len-1:] # Subtract 1 because the first token (start token) is removed | |
| # Calculate perplexity | |
| lengths = (ce_loss != 0).sum(dim=-1) | |
| mean = ce_loss.sum(dim=-1) / lengths | |
| ppl[start : end] = torch.exp(-1 * mean) | |
| return ppl | |
| def calculate_diversity(generations, k=4): | |
| """ | |
| Calculate diversity of generations using SELF-BLEU. | |
| Args: | |
| generations (List[List[List[int]]]): Tokenized input | |
| k (int, Optional): Number of n-grams to use for bleu | |
| Returns: | |
| Average diversity across all generations (float) | |
| """ | |
| nltk.download('punkt') # Can be deleted once downloaded | |
| smooth = SmoothingFunction() | |
| bleus = [] | |
| for drafts in generations: | |
| tokenized_drafts = [] | |
| # Stringify tokens | |
| for d in drafts: | |
| if -1 in d: | |
| d = d[:d.index(-1)] | |
| tokenized_drafts.append([str(n) for n in d]) | |
| # Calculate SELF-BLEU | |
| minlength = min([len(g) for g in tokenized_drafts]) | |
| minlength = min(minlength, k) | |
| weights = tuple((1. / minlength for _ in range(minlength))) | |
| for i in range(len(drafts)): | |
| # Create source and reference (all other drafts) | |
| src = tokenized_drafts[i] | |
| ref = tokenized_drafts[:i] + tokenized_drafts[i+1:] | |
| tmp = nltk.translate.bleu_score.sentence_bleu(references=ref, | |
| hypothesis=src, | |
| weights=weights, | |
| smoothing_function=smooth.method1) | |
| bleus.append(tmp) | |
| bleus = torch.Tensor(bleus) | |
| return torch.mean(bleus) | |
| def calculate_ngram_repetition(sequences): | |
| """ | |
| Calculate uniqueness scores of `sequences`. | |
| Args: | |
| sequences (List[List[int]]): Generated sequences | |
| Returns: | |
| (unigram_uniqueness, bigram_uniqueness, trigram_uniqueness) | |
| """ | |
| u_total = 0 | |
| b_total = 0 | |
| t_total = 0 | |
| # Iterate through all sequences indiscriminately | |
| for gen in sequences: | |
| if -1 in gen: | |
| gen = gen[:gen.index(-1)] | |
| unigrams, bigrams, trigrams = [], [], [] | |
| o = [str(i) for i in gen] | |
| # Create lists of n-grams for the generation | |
| for i in range(len(o)): | |
| unigrams.append(o[i]) | |
| for i in range(len(o) - 1): | |
| bigrams.append(o[i] + '_' + o[i + 1]) | |
| for i in range(len(o) - 2): | |
| trigrams.append(o[i] + '_' + o[i + 1] + '_' + o[i + 2]) | |
| # Calculate uniqueness of the generation | |
| u, b, t = len(set(unigrams)) / len(unigrams), len(set(bigrams)) / len(bigrams), len(set(trigrams)) / len(trigrams) | |
| u_total += u | |
| b_total += b | |
| t_total += t | |
| return u_total / len(sequences), b_total / len(sequences), t_total / len(sequences) | |