Spaces:
Running
Running
| import os | |
| import time | |
| from pathlib import Path | |
| import streamlit as st | |
| from src import bible_loader | |
| from src.embeddings import EmbeddingsManager | |
| from src.reranker import ( | |
| CombinedScoreAndNumberReranker, | |
| MaxVerseReranker, | |
| Reranker, | |
| SemanticSimScoreReranker, | |
| ) | |
| from src.retriever import Retriever, SemanticRetriever | |
| def display_chapter(chapter): | |
| st.header(f"[{str(chapter)}]({chapter.get_biblegateway_url()})") | |
| chapter_text = chapter.get_formatted_text() | |
| st.markdown(chapter_text, unsafe_allow_html=True) | |
| # st.write(chapter.highlight_verses_df) | |
| def config(): | |
| n_results = st.sidebar.slider("Maximum Results?", 5, 30, 10) | |
| # bible_version = st.sidebar.selectbox("Bible Version", ["NIV", "ESV"]) # TODO | |
| bible_version = "NIV" | |
| new_testament = st.sidebar.checkbox("Search New Testament?", True) | |
| old_testament = st.sidebar.checkbox("Search Old Testament?", False) | |
| return n_results, new_testament, old_testament, bible_version | |
| def main(): | |
| st.set_page_config(page_title="Bible Search", layout="wide") | |
| n_results, new_testament, old_testament, bible_version = config() | |
| # Config | |
| ROOT_DIR = Path(os.path.abspath(os.path.dirname(__file__))) | |
| DATA_DIR = ROOT_DIR / "data" | |
| n_candidates = n_results * 2 | |
| metadata_csv = DATA_DIR / "key_english.csv" | |
| verses_csv = DATA_DIR / f"{bible_version}.csv" | |
| semantic_sim_model = "msmarco-distilbert-base-v4" | |
| # Initialize / Index | |
| bible_df = bible_loader.load_bible(metadata_csv, verses_csv) | |
| embeddings_manager = EmbeddingsManager( | |
| model_name=semantic_sim_model, | |
| bible_version=bible_version, | |
| embeddings_cache_dir=DATA_DIR, | |
| texts=bible_df["text"].tolist(), | |
| ) | |
| # Trim down search space if needed | |
| if not new_testament: | |
| bible_df = bible_df[bible_df["testament"] != "NT"] | |
| if not old_testament: | |
| bible_df = bible_df[bible_df["testament"] != "OT"] | |
| # Initialize retriever and reranker based on filtered texts | |
| retriever = SemanticRetriever(bible_df, embeddings_manager) | |
| reranker = CombinedScoreAndNumberReranker() | |
| # reranker = SemanticSimScoreReranker() | |
| # reranker = MaxVerseReranker() | |
| # Get user input | |
| st.title("Verse Similarity Search") | |
| st.markdown( | |
| "- Have you ever been stumped by a verse and wondered what related things the Bible says about it?\n" | |
| "- Or you have a verse of interest and you simply want to find related ones?\n" | |
| "- Or you vaguely recall a verse's idea, but can't recall the exact text?\n" | |
| "This tool was made just for that!" | |
| ) | |
| st.markdown("---") | |
| demo_query = st.selectbox( | |
| "Try some demo queries...", | |
| [ | |
| "", | |
| "For God so loved the world that he gave his one and only Son, that whoever believes in him shall not perish but have eternal life.", | |
| "In the same way, faith by itself, if it is not accompanied by action, is dead.", | |
| "I tell you the truth, no one can enter the kingdom of God unless he is born of water and the Spirit.", | |
| "the Lord is patient with us, not wanting us to perish", | |
| "is it ok for believers to continue in sin?", | |
| "it is possible to resist every temptation", | |
| "heavenly rewards", | |
| "the old is gone, the new has come", | |
| "suffering for Christ", | |
| "rejoicing in trials", | |
| "Be careful of false prophets, wolves in sheep skin", | |
| "will there be marriage in heaven?", | |
| ], | |
| index=1, | |
| ) | |
| query = st.text_area( | |
| "Or type a verse's text here to find similar verses", | |
| demo_query if demo_query.strip() else "", | |
| ) | |
| clicked_search = st.button("Search", type="primary") | |
| if query or clicked_search: | |
| if len(bible_df) == 0: | |
| st.markdown( | |
| "---\n:red[Please select at least one testament to search through (left hand side of the screen). :)]" | |
| ) | |
| else: | |
| with st.spinner("Searching..."): | |
| start = time.time() | |
| # Retrieve and re-rank | |
| candidate_chapters = retriever.retrieve(query, n=n_candidates) | |
| candidate_chapters = reranker.rerank(candidate_chapters) | |
| # Trim because candidates can be more than the desired results | |
| final_chapter_results = candidate_chapters[:n_results] | |
| # Display quick stats | |
| st.markdown( | |
| f"_{len(final_chapter_results)} results found in {time.time()-start:.2f}s_" | |
| ) | |
| st.markdown("---") | |
| # Display results | |
| for chapter in final_chapter_results: | |
| display_chapter(chapter) | |
| st.markdown("---") | |
| if __name__ == "__main__": | |
| main() | |