| --- |
| tags: |
| - autotrain |
| - summarization |
| language: |
| - en |
| widget: |
| - text: > |
| class Solution(object): |
| def isValid(self, s): |
| stack = [] |
| mapping = {")": "(", "}": "{", "]": "["} |
| for char in s: |
| if char in mapping: |
| top_element = stack.pop() if stack else '#' |
| if mapping[char] != top_element: |
| return False |
| else: |
| stack.append(char) |
| return not stack |
| datasets: |
| - sagard21/autotrain-data-code-explainer |
| co2_eq_emissions: |
| emissions: 5.393079045128973 |
| license: mit |
| pipeline_tag: summarization |
| --- |
| |
| # Model Trained Using AutoTrain |
|
|
| - Problem type: Summarization |
| - Model ID: 2745581349 |
| - CO2 Emissions (in grams): 5.3931 |
|
|
| # Model Description |
|
|
| This model is an attempt to simplify code understanding by generating line by line explanation of a source code. This model was fine-tuned using the Salesforce/codet5-large model. Currently it is trained on a small subset of Python snippets. |
|
|
| # Model Usage |
|
|
| ```py |
| from transformers import ( |
| AutoModelForSeq2SeqLM, |
| AutoTokenizer, |
| AutoConfig, |
| pipeline, |
| ) |
| |
| model_name = "ashwinR/CodeExplainer" |
| |
| tokenizer = AutoTokenizer.from_pretrained(model_name, padding=True) |
| |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
| |
| config = AutoConfig.from_pretrained(model_name) |
| |
| model.eval() |
| |
| pipe = pipeline("summarization", model=model_name, config=config, tokenizer=tokenizer) |
| |
| raw_code = """ |
| def preprocess(text: str) -> str: |
| text = str(text) |
| text = text.replace("\n", " ") |
| tokenized_text = text.split(" ") |
| preprocessed_text = " ".join([token for token in tokenized_text if token]) |
| |
| return preprocessed_text |
| """ |
| |
| print(pipe(raw_code)[0]["summary_text"]) |
| |
| ``` |
|
|
| ## Validation Metrics |
|
|
| - Loss: 2.156 |
| - Rouge1: 29.375 |
| - Rouge2: 18.128 |
| - RougeL: 25.445 |
| - RougeLsum: 28.084 |
| - Gen Len: 19.000 |
|
|