""" HRHUB V2.1 - Company View Dynamic company-to-candidate matching interface """ import streamlit as st import sys from pathlib import Path import re # Add parent directory to path for imports parent_dir = Path(__file__).parent.parent sys.path.append(str(parent_dir)) from config import * from data.data_loader import ( load_embeddings, # find_top_matches_company # Function doesn't exist yet - using embedded version below ) from utils.display import ( display_company_profile_basic, display_candidate_card_basic, display_match_table_candidates, display_stats_overview_company ) from utils.visualization import create_network_graph from utils.viz_heatmap import render_skills_heatmap_section from utils.viz_bilateral import render_bilateral_fairness_section # NEW IMPORT import streamlit.components.v1 as components import numpy as np def configure_page(): """Configure Streamlit page settings and custom CSS.""" st.set_page_config( page_title="HRHUB - Company View", page_icon="🏢", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) def validate_company_input(input_str): """ Validate company input (ID or search term). Returns: (is_valid, company_id, error_message) """ if not input_str: return False, None, "Please enter a company ID or name" input_clean = input_str.strip() # Check if it's a numeric ID if input_clean.isdigit(): company_id = int(input_clean) return True, company_id, None # Otherwise treat as search term (we'll search by name) return True, input_clean, None def find_company_by_name(companies_df, search_term): """ Find company by name (case-insensitive partial match). Returns: (found, company_id, company_name) """ search_lower = search_term.lower() # Search in company names if 'name' in companies_df.columns: matches = companies_df[companies_df['name'].str.lower().str.contains(search_lower, na=False)] if len(matches) > 0: # Return first match company_id = matches.index[0] company_name = matches.iloc[0]['name'] return True, company_id, company_name return False, None, None def find_top_candidate_matches(company_id, company_embeddings, candidate_embeddings, candidates_df, top_k=10): """ Find top candidate matches for a company (reverse of candidate matching). """ # Get company embedding company_emb = company_embeddings[company_id].reshape(1, -1) # Calculate cosine similarity with all candidates # Normalize embeddings company_norm = company_emb / np.linalg.norm(company_emb) candidate_norms = candidate_embeddings / np.linalg.norm(candidate_embeddings, axis=1, keepdims=True) # Compute similarities similarities = np.dot(candidate_norms, company_norm.T).flatten() # Get top K indices top_indices = np.argsort(similarities)[::-1][:top_k] # Format results matches = [] for idx in top_indices: matches.append({ 'candidate_id': int(idx), 'score': float(similarities[idx]) }) return matches def render_sidebar(): """Render sidebar with controls and information.""" with st.sidebar: # Logo/Title st.markdown("### 🏢 Company Matching") st.markdown("---") # Settings section st.markdown("### ⚙️ Settings") # Number of matches top_k = st.slider( "Number of Matches", min_value=5, max_value=20, value=DEFAULT_TOP_K, step=5, help="Select how many top candidates to display" ) # Minimum score threshold min_score = st.slider( "Minimum Match Score", min_value=0.0, max_value=1.0, value=MIN_SIMILARITY_SCORE, step=0.05, help="Filter candidates below this similarity score" ) st.markdown("---") # View mode selection st.markdown("### 👀 View Mode") view_mode = st.radio( "Select view:", ["📊 Overview", "🔍 Detailed Cards", "📈 Table View"], help="Choose how to display candidate matches" ) st.markdown("---") # Information section with st.expander("ℹ️ About", expanded=False): st.markdown(""" **Company View** helps you discover top talent based on: - 🤖 **NLP Embeddings**: 384-dimensional semantic space - 📊 **Cosine Similarity**: Scale-invariant matching - 🌉 **Job Postings Bridge**: Vocabulary alignment **How it works:** 1. Enter company ID or search by name 2. System finds top candidate matches 3. Explore candidates with scores and skills 4. Visualize talent network via graph """) with st.expander("📚 Input Format", expanded=False): st.markdown(""" **Valid formats:** - `9418` → Company ID 9418 - `30989` → Company ID 30989 - `Anblicks` → Search by name - `iO Associates` → Partial name search **Search tips:** - Case-insensitive - Partial matches work - Returns first match found """) with st.expander("📊 Coverage Info", expanded=False): st.markdown(""" **Company Coverage:** - 🟢 **30,000 companies** with job postings - 🟡 **120,000 companies** via collaborative filtering - 📈 **5x coverage expansion** through skill inference Companies without job postings inherit skills from similar companies. """) st.markdown("---") # Back to home button if st.button("🏠 Back to Home", use_container_width=True): st.switch_page("app.py") # Version info st.caption(f"Version: {VERSION}") st.caption("© 2024 HRHUB Team") return top_k, min_score, view_mode def get_network_graph_data_company(company_id, matches, companies_df): """Generate network graph data from matches (company perspective).""" nodes = [] edges = [] # Add company node (red/orange) company_name = companies_df.iloc[company_id].get('name', f'Company {company_id}') if len(company_name) > 30: company_name = company_name[:27] + '...' nodes.append({ 'id': f'COMP{company_id}', 'label': company_name, 'color': '#ff6b6b', 'shape': 'box', 'size': 30 }) # Add candidate nodes (green) and edges for cand_id, score, cand_data in matches: nodes.append({ 'id': f'C{cand_id}', 'label': f'Candidate #{cand_id}', 'color': '#4ade80', 'shape': 'dot', 'size': 20 }) edges.append({ 'from': f'COMP{company_id}', 'to': f'C{cand_id}', 'value': float(score) * 10, 'title': f'Match Score: {score:.3f}' }) return {'nodes': nodes, 'edges': edges} def render_network_section(company_id: int, matches, companies_df): """Render interactive network visualization section.""" st.markdown('

🕸️ Talent Network

', unsafe_allow_html=True) # Explanation box st.markdown("""

💡 What this shows: Talent network reveals skill alignment and candidate clustering. Thicker edges indicate stronger semantic match between company requirements and candidate skills.

""", unsafe_allow_html=True) with st.spinner("Generating interactive network graph..."): # Get graph data graph_data = get_network_graph_data_company(company_id, matches, companies_df) # Create HTML graph html_content = create_network_graph( nodes=graph_data['nodes'], edges=graph_data['edges'], height="600px" ) # Display in Streamlit components.html(html_content, height=620, scrolling=False) # Graph instructions with st.expander("📖 Graph Controls", expanded=False): st.markdown(""" **How to interact:** - 🖱️ **Drag nodes**: Click and drag to reposition - 🔍 **Zoom**: Scroll to zoom in/out - 👆 **Pan**: Click background and drag to pan - 🎯 **Hover**: Hover over nodes/edges for details **Legend:** - 🔴 **Red square**: Your company - 🟢 **Green circles**: Matched candidates - **Line thickness**: Match strength (thicker = better) """) def render_matches_section(matches, view_mode: str): """Render candidate matches section with different view modes.""" st.markdown('

🎯 Candidate Matches

', unsafe_allow_html=True) if view_mode == "📊 Overview" or view_mode == "📈 Table View": # Table view - use display function display_match_table_candidates(matches) elif view_mode == "🔍 Detailed Cards": # Card view - use display function for rank, (cand_id, score, cand_data) in enumerate(matches, 1): display_candidate_card_basic(cand_data, cand_id, score, rank) def main(): """Main application entry point.""" # Configure page configure_page() # Render header st.markdown('

🏢 Company View

', unsafe_allow_html=True) st.markdown('

Discover top talent for your company

', unsafe_allow_html=True) # Render sidebar and get settings top_k, min_score, view_mode = render_sidebar() st.markdown("---") # Load embeddings (cache in session state) if 'embeddings_loaded' not in st.session_state: with st.spinner("📄 Loading embeddings and data..."): try: cand_emb, comp_emb, cand_df, comp_df = load_embeddings() st.session_state.embeddings_loaded = True st.session_state.candidate_embeddings = cand_emb st.session_state.company_embeddings = comp_emb st.session_state.candidates_df = cand_df st.session_state.companies_df = comp_df st.markdown("""

✅ Data loaded successfully! Ready to find talent.

""", unsafe_allow_html=True) except Exception as e: st.error(f"❌ Error loading data: {str(e)}") st.stop() # Company input section st.markdown("### 🔍 Enter Company ID or Name") col1, col2 = st.columns([3, 1]) with col1: company_input = st.text_input( "Company ID or Name", value="9418", max_chars=100, help="Enter company ID (e.g., 9418) or search by name (e.g., Anblicks)", label_visibility="collapsed" ) with col2: search_button = st.button("🚀 Find Candidates", use_container_width=True, type="primary") # Validate input is_valid, company_id_or_search, error_msg = validate_company_input(company_input) if not is_valid: st.warning(f"⚠️ {error_msg}") st.stop() # Determine if it's ID or search if isinstance(company_id_or_search, int): # Direct ID company_id = company_id_or_search # Check if company exists if company_id >= len(st.session_state.companies_df): st.error(f"❌ Company ID {company_id} not found. Maximum ID: {len(st.session_state.companies_df) - 1}") st.stop() company = st.session_state.companies_df.iloc[company_id] company_name = company.get('name', f'Company {company_id}') else: # Search by name found, company_id, company_name = find_company_by_name(st.session_state.companies_df, company_id_or_search) if not found: st.error(f"❌ No company found matching: '{company_id_or_search}'") st.info("💡 **Tip:** Try searching with partial name or use company ID directly") st.stop() company = st.session_state.companies_df.iloc[company_id] st.success(f"✅ Found: **{company_name}** (ID: {company_id})") # Show company info st.markdown(f"""

Selected: {company_name} (ID: {company_id}) | Total companies in system: {len(st.session_state.companies_df):,}

""", unsafe_allow_html=True) # Check if company has job postings has_postings = company.get('has_job_postings', False) if 'has_job_postings' in company else True if not has_postings: st.markdown("""

ℹ️ Note: This company uses collaborative filtering (skills inherited from similar companies). Matching still works but may be less precise than companies with direct job postings.

""", unsafe_allow_html=True) # Find matches with st.spinner("🔄 Finding top candidate matches..."): matches_list = find_top_candidate_matches( company_id, st.session_state.company_embeddings, st.session_state.candidate_embeddings, st.session_state.candidates_df, top_k ) # Format matches for display matches = [ (m['candidate_id'], m['score'], st.session_state.candidates_df.iloc[m['candidate_id']]) for m in matches_list ] # Filter by minimum score matches = [(cid, score, cdata) for cid, score, cdata in matches if score >= min_score] if not matches: st.warning(f"⚠️ No candidates found above {min_score:.0%} threshold. Try lowering the minimum score in the sidebar.") st.stop() st.markdown("---") # Display statistics using display function display_stats_overview_company(company, matches) st.markdown("---") # Create two columns for layout col1, col2 = st.columns([1, 2]) with col1: # Company profile section st.markdown('

🏢 Company Profile

', unsafe_allow_html=True) # Use basic display function display_company_profile_basic(company, company_id) with col2: # Matches section render_matches_section(matches, view_mode) st.markdown("---") # Skills Heatmap (show for top candidate match) if len(matches) > 0: top_cand_id, top_cand_score, top_cand_data = matches[0] st.markdown("### 🔥 Skills Analysis - Top Candidate") render_skills_heatmap_section( top_cand_data, company, st.session_state.candidate_embeddings[top_cand_id], st.session_state.company_embeddings[company_id], top_cand_score ) st.markdown("---") # Network visualization (full width) render_network_section(company_id, matches, st.session_state.companies_df) st.markdown("---") # BILATERAL FAIRNESS PROOF SECTION - NEW render_bilateral_fairness_section( st.session_state.candidate_embeddings, st.session_state.company_embeddings ) st.markdown("---") # Technical info expander with st.expander("🔧 Technical Details", expanded=False): st.markdown(f""" **Current Configuration:** - Company ID: {company_id} - Company Name: {company_name} - Embedding Dimension: {EMBEDDING_DIMENSION} - Similarity Metric: Cosine Similarity - Top K Matches: {top_k} - Minimum Score: {min_score:.0%} - Candidates Available: {len(st.session_state.candidates_df):,} - Companies in System: {len(st.session_state.companies_df):,} **Algorithm:** 1. Load pre-computed company embedding 2. Calculate cosine similarity with all candidate embeddings 3. Rank candidates by similarity score 4. Return top-K matches above threshold **Coverage Strategy:** - Companies WITH job postings: Direct semantic matching - Companies WITHOUT postings: Collaborative filtering (inherit from similar companies) - Total coverage: 150K companies (5x expansion from 30K base) """) if __name__ == "__main__": main()