Spaces:
Runtime error
Runtime error
Corrected table tokens and move to previous chipper
Browse files- app.py +1 -1
- logits_ngrams.py +1 -1
app.py
CHANGED
|
@@ -81,7 +81,7 @@ else:
|
|
| 81 |
st.image(image, caption='Your target document')
|
| 82 |
|
| 83 |
with st.spinner(f'Processing the document ...'):
|
| 84 |
-
pre_trained_model = "unstructuredio/chipper-fast-fine-tuning
|
| 85 |
processor = DonutProcessor.from_pretrained(pre_trained_model, token=os.environ['HF_TOKEN'])
|
| 86 |
|
| 87 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 81 |
st.image(image, caption='Your target document')
|
| 82 |
|
| 83 |
with st.spinner(f'Processing the document ...'):
|
| 84 |
+
pre_trained_model = "unstructuredio/chipper-fast-fine-tuning"
|
| 85 |
processor = DonutProcessor.from_pretrained(pre_trained_model, token=os.environ['HF_TOKEN'])
|
| 86 |
|
| 87 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
logits_ngrams.py
CHANGED
|
@@ -59,5 +59,5 @@ def _calc_banned_tokens(prev_input_ids, num_hypos, no_repeat_ngram_size, cur_len
|
|
| 59 |
|
| 60 |
|
| 61 |
def get_table_token_ids(processor):
|
| 62 |
-
skip_tokens = {token_id for token, token_id in processor.tokenizer.get_added_vocab().items() if
|
| 63 |
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
def get_table_token_ids(processor):
|
| 62 |
+
skip_tokens = {token_id for token, token_id in processor.tokenizer.get_added_vocab().items() if token.startswith("<t") or token.startswith("</t") }
|
| 63 |
|