Spaces:

maomao88
/

visualize_attention_for_translation_en_zh

Running

App Files Files Community

maomao88 commited on Apr 15

Commit

93559de

1 Parent(s): 77a9363

add encoder self-attention

Browse files

Files changed (3) hide show

__pycache__/utils.cpython-313.pyc +0 -0
app.py +105 -16
utils.py +5 -9

__pycache__/utils.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/utils.cpython-313.pyc and b/__pycache__/utils.cpython-313.pyc differ

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import torch
 from torch import nn
 import gradio as gr
-from utils import save_data, get_attn_list, get_top_attns
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
@@ -37,8 +37,11 @@ def translate_text(input_text):
     avg_decoder_attn_list = get_attn_list(translated.decoder_attentions, layer_index)
     decoder_attn_scores = get_top_attns(avg_decoder_attn_list)
     # save_data(outputs, src_tokens, tgt_tokens, attn_scores)
-    return outputs, render_cross_attn_html(src_tokens, tgt_tokens), cross_attn_scores, render_encoder_decoder_attn_html(tgt_tokens, "Output"), decoder_attn_scores
 def render_cross_attn_html(src_tokens, tgt_tokens):
@@ -64,13 +67,17 @@ def render_cross_attn_html(src_tokens, tgt_tokens):
 def render_encoder_decoder_attn_html(tokens, type):
     # Build HTML for source and target tokens
     tokens_html = ""
     for i, token in enumerate(tokens):
-        tokens_html += f'<span class="token decoder-token" data-index="{i}">{token}</span> '
     html = f"""
         <div class="tgt-token-wrapper-text">{type} Tokens</div>
         <div class="tgt-token-wrapper">{tokens_html}</div>
-        <div class="scores"><span class="score-1 decoder-score"></span><span class="score-2 decoder-score"></span><span class="score-3 decoder-score"></span><div>
         """
     return html
@@ -80,7 +87,7 @@ css = """
 .output-html {padding-top: 1rem; padding-bottom: 1rem;}
 .output-html-row {margin-bottom: .5rem; border: var(--block-border-width) solid var(--block-border-color); border-radius: var(--block-radius);}
 .token {padding: .5rem; border-radius: 5px;}
-.tgt-token {cursor: pointer;}
 .tgt-token-wrapper {line-height: 2.5rem; padding: .5rem;}
 .src-token-wrapper {line-height: 2.5rem; padding: .5rem;}
 .src-token-wrapper-text {position: absolute; bottom: .75rem; color: #71717a;}
@@ -94,18 +101,21 @@ css = """
 """
 js = """
-function showCrossAttFun(attn_scores, decoder_attn) {
     const scrTokens = document.querySelectorAll('.src-token');
     const srcLen = scrTokens.length - 1
     const targetTokens = document.querySelectorAll('.tgt-token');
     const scores = document.querySelectorAll('.score');
     const decoderTokens = document.querySelectorAll('.decoder-token');
     const decLen = decoderTokens.length - 1
     const decoderScores = document.querySelectorAll('.decoder-score');
     function onTgtHover(event, idx) {
         event.style.backgroundColor = "#C6E6E6";
@@ -153,9 +163,7 @@ function showCrossAttFun(attn_scores, decoder_attn) {
         scores[2].style.display = "none";
     }
-    function onSelfHover(event, idx) {
-        event.style.backgroundColor = "#C6E6E6";
         idx0 = decoder_attn[idx]['top_index'][0]
         if (idx0 < decLen) {
             el0 = decoderTokens[idx0]
@@ -181,12 +189,12 @@ function showCrossAttFun(attn_scores, decoder_attn) {
         }
         for (i=idx+1; i < decoderTokens.length; i++) {
-            decoderTokens[i].style.color = "#aaa8a8";
         }
     }
-    function outSelfHover(event, idx) {
         event.style.backgroundColor = "";
         idx0 = decoder_attn[idx]['top_index'][0]
         el0 = decoderTokens[idx0]
@@ -216,6 +224,62 @@ function showCrossAttFun(attn_scores, decoder_attn) {
     }
     targetTokens.forEach((el, idx) => {
         el.addEventListener("mouseover", () => {
             onTgtHover(el, idx)
@@ -230,13 +294,25 @@ function showCrossAttFun(attn_scores, decoder_attn) {
     decoderTokens.forEach((el, idx) => {
         el.addEventListener("mouseover", () => {
-            onSelfHover(el, idx)
         })
     });
     decoderTokens.forEach((el, idx) => {
         el.addEventListener("mouseout", () => {
-            outSelfHover(el, idx)
         })
     });
 }
@@ -269,6 +345,7 @@ with gr.Blocks(css=css) as demo:
     cross_attn = gr.JSON(value=[], visible=False)
     decoder_attn = gr.JSON(value=[], visible=False)
     gr.Markdown(
         """
@@ -281,6 +358,18 @@ with gr.Blocks(css=css) as demo:
     with gr.Row(elem_classes="output-html-row"):
         output_html = gr.HTML(label="Cross Attention", elem_classes="output-html")
     gr.Markdown(
         """
             ## Check Self Attentions for Decoder
@@ -293,9 +382,9 @@ with gr.Blocks(css=css) as demo:
     with gr.Row(elem_classes="output-html-row"):
         decoder_output_html = gr.HTML(label="Decoder Attention)", elem_classes="output-html")
-    translate_button.click(fn=translate_text, inputs=input_box, outputs=[output_box, output_html, cross_attn, decoder_output_html, decoder_attn])
-    output_box.change(None, [cross_attn, decoder_attn], None, js=js)
     gr.Markdown("**Note:** I'm using a transformer model of encoder-decoder architecture (`Helsinki-NLP/opus-mt-en-zh`) in order to obtain cross attention from the decoder layers. ",
                 elem_classes="note-text")

 import torch
 from torch import nn
 import gradio as gr
+from utils import save_data, get_attn_list, get_top_attns, get_encoder_attn_list
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
     avg_decoder_attn_list = get_attn_list(translated.decoder_attentions, layer_index)
     decoder_attn_scores = get_top_attns(avg_decoder_attn_list)
+    avg_encoder_attn_list = get_encoder_attn_list(translated.encoder_attentions, layer_index)
+    encoder_attn_scores = get_top_attns(avg_encoder_attn_list)
     # save_data(outputs, src_tokens, tgt_tokens, attn_scores)
+    return outputs, render_cross_attn_html(src_tokens, tgt_tokens), cross_attn_scores, render_encoder_decoder_attn_html(tgt_tokens, "Output"), decoder_attn_scores, render_encoder_decoder_attn_html(src_tokens, "Input"), encoder_attn_scores
 def render_cross_attn_html(src_tokens, tgt_tokens):
 def render_encoder_decoder_attn_html(tokens, type):
     # Build HTML for source and target tokens
     tokens_html = ""
+    className = "decoder"
+    if type == "Input":
+        className = "encoder"
     for i, token in enumerate(tokens):
+        tokens_html += f'<span class="token {className}-token" data-index="{i}">{token}</span> '
     html = f"""
         <div class="tgt-token-wrapper-text">{type} Tokens</div>
         <div class="tgt-token-wrapper">{tokens_html}</div>
+        <div class="scores"><span class="score-1 {className}-score"></span><span class="score-2 {className}-score"></span><span class="score-3 {className}-score"></span><div>
         """
     return html
 .output-html {padding-top: 1rem; padding-bottom: 1rem;}
 .output-html-row {margin-bottom: .5rem; border: var(--block-border-width) solid var(--block-border-color); border-radius: var(--block-radius);}
 .token {padding: .5rem; border-radius: 5px;}
+.token {cursor: pointer;}
 .tgt-token-wrapper {line-height: 2.5rem; padding: .5rem;}
 .src-token-wrapper {line-height: 2.5rem; padding: .5rem;}
 .src-token-wrapper-text {position: absolute; bottom: .75rem; color: #71717a;}
 """
 js = """
+function showCrossAttFun(attn_scores, decoder_attn, encoder_attn) {
     const scrTokens = document.querySelectorAll('.src-token');
     const srcLen = scrTokens.length - 1
     const targetTokens = document.querySelectorAll('.tgt-token');
     const scores = document.querySelectorAll('.score');
     const decoderTokens = document.querySelectorAll('.decoder-token');
     const decLen = decoderTokens.length - 1
     const decoderScores = document.querySelectorAll('.decoder-score');
+    const encoderTokens = document.querySelectorAll('.encoder-token');
+    const encLen = encoderTokens.length - 1
+    const encoderScores = document.querySelectorAll('.encoder-score');
     function onTgtHover(event, idx) {
         event.style.backgroundColor = "#C6E6E6";
         scores[2].style.display = "none";
     }
+    function onDecodeHover(event, idx) {
         idx0 = decoder_attn[idx]['top_index'][0]
         if (idx0 < decLen) {
             el0 = decoderTokens[idx0]
         }
         for (i=idx+1; i < decoderTokens.length; i++) {
+            decoderTokens[i].style.color = "#ccc9c9";
         }
     }
+    function outDecodeHover(event, idx) {
         event.style.backgroundColor = "";
         idx0 = decoder_attn[idx]['top_index'][0]
         el0 = decoderTokens[idx0]
     }
+    function onEncodeHover(event, idx) {
+        idx0 = encoder_attn[idx]['top_index'][0]
+        if (idx0 < encLen) {
+            el0 = encoderTokens[idx0]
+            el0.style.backgroundColor = "#89C6C6"
+            encoderScores[0].textContent = encoder_attn[idx]['top_values'][0]
+            encoderScores[0].style.display = "initial"
+            encoderScores[0].style.backgroundColor = "#89C6C6"
+        }
+        idx1 = encoder_attn[idx]['top_index'][1]
+        if (idx1 < encLen) {
+            el1 = encoderTokens[idx1]
+            el1.style.backgroundColor = "#C6E6E6"
+            encoderScores[1].textContent = encoder_attn[idx]['top_values'][1]
+            encoderScores[1].style.display = "initial"
+            encoderScores[1].style.backgroundColor = "#C6E6E6"
+        }
+        idx2 = encoder_attn[idx]['top_index'][2]
+        if (idx2 < encLen) {
+            el2 = encoderTokens[idx2]
+            el2.style.backgroundColor = "#E5F5F5"
+            encoderScores[2].textContent = encoder_attn[idx]['top_values'][2]
+            encoderScores[2].style.display = "initial"
+            encoderScores[2].style.backgroundColor = "#E5F5F5"
+        }
+    }
+    function outEncodeHover(event, idx) {
+        event.style.backgroundColor = "";
+        idx0 = encoder_attn[idx]['top_index'][0]
+        el0 = encoderTokens[idx0]
+        el0.style.backgroundColor = ""
+        encoderScores[0].textContent = ""
+        encoderScores[0].style.display = "none";
+        idx1 = encoder_attn[idx]['top_index'][1]
+        if (idx1 || idx1 == 0) {
+            el1 = encoderTokens[idx1]
+            el1.style.backgroundColor = ""
+            encoderScores[1].textContent = ""
+            encoderScores[1].style.display = "none";
+        }
+        idx2 = encoder_attn[idx]['top_index'][2]
+        if (idx2 || idx2 == 0) {
+            el2 = encoderTokens[idx2]
+            el2.style.backgroundColor = ""
+            encoderScores[2].textContent = ""
+            encoderScores[2].style.display = "none";
+        }
+    }
     targetTokens.forEach((el, idx) => {
         el.addEventListener("mouseover", () => {
             onTgtHover(el, idx)
     decoderTokens.forEach((el, idx) => {
         el.addEventListener("mouseover", () => {
+            onDecodeHover(el, idx)
         })
     });
     decoderTokens.forEach((el, idx) => {
         el.addEventListener("mouseout", () => {
+            outDecodeHover(el, idx)
+        })
+    });
+    encoderTokens.forEach((el, idx) => {
+        el.addEventListener("mouseover", () => {
+            onEncodeHover(el, idx)
+        })
+    });
+    encoderTokens.forEach((el, idx) => {
+        el.addEventListener("mouseout", () => {
+            outEncodeHover(el, idx)
         })
     });
 }
     cross_attn = gr.JSON(value=[], visible=False)
     decoder_attn = gr.JSON(value=[], visible=False)
+    encoder_attn = gr.JSON(value=[], visible=False)
     gr.Markdown(
         """
     with gr.Row(elem_classes="output-html-row"):
         output_html = gr.HTML(label="Cross Attention", elem_classes="output-html")
+    gr.Markdown(
+        """
+            ## Check Self Attentions for Encoder
+            Hover your mouse over an input (English) word/token to see which word/token it is self-attending to.
+            """,
+        elem_classes="output-html-desc"
+    )
+    with gr.Row(elem_classes="output-html-row"):
+        encoder_output_html = gr.HTML(label="Decoder Attention)", elem_classes="output-html")
     gr.Markdown(
         """
             ## Check Self Attentions for Decoder
     with gr.Row(elem_classes="output-html-row"):
         decoder_output_html = gr.HTML(label="Decoder Attention)", elem_classes="output-html")
+    translate_button.click(fn=translate_text, inputs=input_box, outputs=[output_box, output_html, cross_attn, decoder_output_html, decoder_attn, encoder_output_html, encoder_attn])
+    output_box.change(None, [cross_attn, decoder_attn, encoder_attn], None, js=js)
     gr.Markdown("**Note:** I'm using a transformer model of encoder-decoder architecture (`Helsinki-NLP/opus-mt-en-zh`) in order to obtain cross attention from the decoder layers. ",
                 elem_classes="note-text")

utils.py CHANGED Viewed

@@ -40,12 +40,8 @@ def get_top_attns(avg_attn_list):
-# def get_encoder_attn_list(decoder_attentions, layer_index):
-#     avg_attn_list = []
-#
-#     for i in range(len(decoder_attentions)):
-#         token_index = i  # pick a token index from the output (1 to 18)
-#         attn_tensor = decoder_attentions[token_index][layer_index]  # shape: [1, 8, 1, 24]
-#         avg_attn_list.append(attn_tensor.squeeze(0).squeeze(1).mean(0))  # shape: [24], mean across heads
-#
-#     return avg_attn_list

+def get_encoder_attn_list(encoder_attentions, layer_index):
+    attn_tensor = encoder_attentions[layer_index]
+    avg_attn_list = attn_tensor[0].mean(dim=0)
+    return avg_attn_list