Spaces:

Rogersurf
/

hrhub

Sleeping

hrhub / pages /2_🏢_Company_View_v1.py

Roger Surf

feat: add heatmap + bilateral fairness visualization + mathematical proof section

96a706d 11 days ago

22.7 kB

	"""
	HRHUB V2.1 - Company View
	Dynamic company-to-candidate matching interface
	"""

	import streamlit as st
	import sys
	from pathlib import Path
	import re

	# Add parent directory to path for imports
	parent_dir = Path(__file__).parent.parent
	sys.path.append(str(parent_dir))

	from config import *
	from data.data_loader import (
	load_embeddings,
	# find_top_matches_company # Function doesn't exist yet - using embedded version below
	)
	from hrhub_project.utils.display_v2 import (
	# display_company_profile, # May not exist - using basic version below
	# display_candidate_card, # May not exist - using basic version below
	# display_match_table_candidates, # May not exist - using basic version below
	# display_stats_overview_company # May not exist - using basic version below
	display_candidate_profile, # Reuse from candidate view
	display_company_card, # Reuse from candidate view
	display_match_table, # Reuse from candidate view
	display_stats_overview # Reuse from candidate view
	)
	from utils.visualization import create_network_graph
	import streamlit.components.v1 as components
	import numpy as np


	def configure_page():
	"""Configure Streamlit page settings and custom CSS."""

	st.set_page_config(
	page_title="HRHUB - Company View",
	page_icon="🏢",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS
	st.markdown("""
	<style>
	/* Main title styling */
	.main-title {
	font-size: 2.5rem;
	font-weight: bold;
	text-align: center;
	color: #667eea;
	margin-bottom: 0;
	}

	.sub-title {
	font-size: 1rem;
	text-align: center;
	color: #666;
	margin-top: 0;
	margin-bottom: 1.5rem;
	}

	/* Section headers */
	.section-header {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 12px;
	border-radius: 8px;
	margin: 15px 0;
	font-size: 1.3rem;
	font-weight: bold;
	}

	/* Info boxes */
	.info-box {
	background-color: #FFF4E6;
	border-left: 5px solid #FF9800;
	padding: 12px;
	border-radius: 5px;
	margin: 10px 0;
	}

	/* Success box */
	.success-box {
	background-color: #D4EDDA;
	border-left: 5px solid #28A745;
	padding: 12px;
	border-radius: 5px;
	margin: 10px 0;
	color: #155724;
	}

	/* Warning box */
	.warning-box {
	background-color: #FFF3CD;
	border-left: 5px solid #FFC107;
	padding: 12px;
	border-radius: 5px;
	margin: 10px 0;
	color: #856404;
	}

	/* Metric cards */
	div[data-testid="metric-container"] {
	background-color: #F8F9FA;
	border: 2px solid #E0E0E0;
	padding: 12px;
	border-radius: 8px;
	}

	/* Expander styling */
	.streamlit-expanderHeader {
	background-color: #F0F2F6;
	border-radius: 5px;
	}

	/* Hide Streamlit branding */
	#MainMenu {visibility: hidden;}
	footer {visibility: hidden;}

	/* Input field styling */
	.stTextInput > div > div > input {
	font-size: 1.1rem;
	font-weight: 600;
	}
	</style>
	""", unsafe_allow_html=True)


	def validate_company_input(input_str):
	"""
	Validate company input (ID or search term).
	Returns: (is_valid, company_id, error_message)
	"""
	if not input_str:
	return False, None, "Please enter a company ID or name"

	input_clean = input_str.strip()

	# Check if it's a numeric ID
	if input_clean.isdigit():
	company_id = int(input_clean)
	return True, company_id, None

	# Otherwise treat as search term (we'll search by name)
	return True, input_clean, None


	def find_company_by_name(companies_df, search_term):
	"""
	Find company by name (case-insensitive partial match).
	Returns: (found, company_id, company_name)
	"""
	search_lower = search_term.lower()

	# Search in company names
	if 'name' in companies_df.columns:
	matches = companies_df[companies_df['name'].str.lower().str.contains(search_lower, na=False)]

	if len(matches) > 0:
	# Return first match
	company_id = matches.index[0]
	company_name = matches.iloc[0]['name']
	return True, company_id, company_name

	return False, None, None


	def find_top_candidate_matches(company_id, company_embeddings, candidate_embeddings, candidates_df, top_k=10):
	"""
	Find top candidate matches for a company (reverse of candidate matching).
	"""
	# Get company embedding
	company_emb = company_embeddings[company_id].reshape(1, -1)

	# Calculate cosine similarity with all candidates
	# Normalize embeddings
	company_norm = company_emb / np.linalg.norm(company_emb)
	candidate_norms = candidate_embeddings / np.linalg.norm(candidate_embeddings, axis=1, keepdims=True)

	# Compute similarities
	similarities = np.dot(candidate_norms, company_norm.T).flatten()

	# Get top K indices
	top_indices = np.argsort(similarities)[::-1][:top_k]

	# Format results
	matches = []
	for idx in top_indices:
	matches.append({
	'candidate_id': int(idx),
	'score': float(similarities[idx])
	})

	return matches


	def display_company_profile_basic(company_data, company_id):
	"""Basic company profile display."""
	st.markdown(f"Company ID: {company_id}")

	if 'name' in company_data:
	st.markdown(f"Name: {company_data['name']}")

	if 'industry' in company_data:
	st.markdown(f"Industry: {company_data['industry']}")

	if 'description' in company_data and company_data['description']:
	with st.expander("📄 Description", expanded=False):
	desc = company_data['description']
	if isinstance(desc, str):
	st.write(desc[:500] + ('...' if len(desc) > 500 else ''))

	# Show job posting status if available
	has_postings = company_data.get('has_job_postings', True)
	if has_postings:
	st.success("✅ Has job postings")
	else:
	st.info("🔄 Collaborative filtering")


	def display_candidate_card_basic(candidate_data, candidate_id, score, rank):
	"""Basic candidate card display."""
	with st.expander(f"#{rank} - Candidate {candidate_id} - {score:.1%}", expanded=(rank <= 3)):
	col1, col2 = st.columns([2, 1])

	with col1:
	st.markdown(f"Candidate ID: {candidate_id}")
	st.markdown(f"Match Score: {score:.1%}")

	# Show any available info
	if 'skills' in candidate_data and candidate_data['skills']:
	st.markdown("Skills:")
	skills = candidate_data['skills']
	if isinstance(skills, str):
	st.write(skills[:200] + ('...' if len(skills) > 200 else ''))

	if 'career_objective' in candidate_data and candidate_data['career_objective']:
	st.markdown("Career Objective:")
	obj = candidate_data['career_objective']
	if isinstance(obj, str):
	st.write(obj[:150] + ('...' if len(obj) > 150 else ''))

	with col2:
	# Match quality badge
	if score >= 0.7:
	st.success("🔥 Excellent")
	elif score >= 0.6:
	st.info("✨ Very Good")
	else:
	st.warning("✅ Good")


	def render_sidebar():
	"""Render sidebar with controls and information."""

	with st.sidebar:
	# Logo/Title
	st.markdown("### 🏢 Company Matching")
	st.markdown("---")

	# Settings section
	st.markdown("### ⚙️ Settings")

	# Number of matches
	top_k = st.slider(
	"Number of Matches",
	min_value=5,
	max_value=20,
	value=DEFAULT_TOP_K,
	step=5,
	help="Select how many top candidates to display"
	)

	# Minimum score threshold
	min_score = st.slider(
	"Minimum Match Score",
	min_value=0.0,
	max_value=1.0,
	value=MIN_SIMILARITY_SCORE,
	step=0.05,
	help="Filter candidates below this similarity score"
	)

	st.markdown("---")

	# View mode selection
	st.markdown("### 👀 View Mode")
	view_mode = st.radio(
	"Select view:",
	["📊 Overview", "🔍 Detailed Cards", "📈 Table View"],
	help="Choose how to display candidate matches"
	)

	st.markdown("---")

	# Information section
	with st.expander("ℹ️ About", expanded=False):
	st.markdown("""
	Company View helps you discover top talent based on:

	- 🤖 NLP Embeddings: 384-dimensional semantic space
	- 📊 Cosine Similarity: Scale-invariant matching
	- 🌉 Job Postings Bridge: Vocabulary alignment

	How it works:
	1. Enter company ID or search by name
	2. System finds top candidate matches
	3. Explore candidates with scores and skills
	4. Visualize talent network via graph
	""")

	with st.expander("📚 Input Format", expanded=False):
	st.markdown("""
	Valid formats:
	- `9418` → Company ID 9418
	- `30989` → Company ID 30989
	- `Anblicks` → Search by name
	- `iO Associates` → Partial name search

	Search tips:
	- Case-insensitive
	- Partial matches work
	- Returns first match found
	""")

	with st.expander("📊 Coverage Info", expanded=False):
	st.markdown("""
	Company Coverage:
	- 🟢 30,000 companies with job postings
	- 🟡 120,000 companies via collaborative filtering
	- 📈 5x coverage expansion through skill inference

	Companies without job postings inherit skills from similar companies.
	""")

	st.markdown("---")

	# Back to home button
	if st.button("🏠 Back to Home", use_container_width=True):
	st.switch_page("app.py")

	# Version info
	st.caption(f"Version: {VERSION}")
	st.caption("© 2024 HRHUB Team")

	return top_k, min_score, view_mode


	def get_network_graph_data_company(company_id, matches, companies_df):
	"""Generate network graph data from matches (company perspective)."""
	nodes = []
	edges = []

	# Add company node (red/orange)
	company_name = companies_df.iloc[company_id].get('name', f'Company {company_id}')
	if len(company_name) > 30:
	company_name = company_name[:27] + '...'

	nodes.append({
	'id': f'COMP{company_id}',
	'label': company_name,
	'color': '#ff6b6b',
	'shape': 'box',
	'size': 30
	})

	# Add candidate nodes (green) and edges
	for cand_id, score, cand_data in matches:
	nodes.append({
	'id': f'C{cand_id}',
	'label': f'Candidate #{cand_id}',
	'color': '#4ade80',
	'shape': 'dot',
	'size': 20
	})

	edges.append({
	'from': f'COMP{company_id}',
	'to': f'C{cand_id}',
	'value': float(score) * 10,
	'title': f'Match Score: {score:.3f}'
	})

	return {'nodes': nodes, 'edges': edges}


	def render_network_section(company_id: int, matches, companies_df):
	"""Render interactive network visualization section."""

	st.markdown('<div class="section-header">🕸️ Talent Network</div>', unsafe_allow_html=True)

	# Explanation box
	st.markdown("""
	<div class="info-box">
	<strong>💡 What this shows:</strong> Talent network reveals skill alignment and candidate clustering.
	Thicker edges indicate stronger semantic match between company requirements and candidate skills.
	</div>
	""", unsafe_allow_html=True)

	with st.spinner("Generating interactive network graph..."):
	# Get graph data
	graph_data = get_network_graph_data_company(company_id, matches, companies_df)

	# Create HTML graph
	html_content = create_network_graph(
	nodes=graph_data['nodes'],
	edges=graph_data['edges'],
	height="600px"
	)

	# Display in Streamlit
	components.html(html_content, height=620, scrolling=False)

	# Graph instructions
	with st.expander("📖 Graph Controls", expanded=False):
	st.markdown("""
	How to interact:

	- 🖱️ Drag nodes: Click and drag to reposition
	- 🔍 Zoom: Scroll to zoom in/out
	- 👆 Pan: Click background and drag to pan
	- 🎯 Hover: Hover over nodes/edges for details

	Legend:
	- 🔴 Red square: Your company
	- 🟢 Green circles: Matched candidates
	- Line thickness: Match strength (thicker = better)
	""")


	def render_matches_section(matches, view_mode: str):
	"""Render candidate matches section with different view modes."""

	st.markdown('<div class="section-header">🎯 Candidate Matches</div>', unsafe_allow_html=True)

	if view_mode == "📊 Overview" or view_mode == "📈 Table View":
	# Table view
	if len(matches) > 0:
	import pandas as pd

	table_data = []
	for rank, (cand_id, score, cand_data) in enumerate(matches, 1):
	table_data.append({
	'Rank': f'#{rank}',
	'Candidate ID': cand_id,
	'Score': f'{score:.1%}',
	'Match Quality': '🔥 Excellent' if score >= 0.7 else ('✨ Very Good' if score >= 0.6 else '✅ Good')
	})

	df = pd.DataFrame(table_data)
	st.dataframe(df, use_container_width=True, hide_index=True)

	# Add info tip
	st.info("💡 Tip: Scores above 0.6 indicate strong alignment between candidate skills and company requirements!")

	elif view_mode == "🔍 Detailed Cards":
	# Card view - detailed using basic function
	for rank, (cand_id, score, cand_data) in enumerate(matches, 1):
	display_candidate_card_basic(cand_data, cand_id, score, rank)


	def main():
	"""Main application entry point."""

	# Configure page
	configure_page()

	# Render header
	st.markdown('<h1 class="main-title">🏢 Company View</h1>', unsafe_allow_html=True)
	st.markdown('<p class="sub-title">Discover top talent for your company</p>', unsafe_allow_html=True)

	# Render sidebar and get settings
	top_k, min_score, view_mode = render_sidebar()

	st.markdown("---")

	# Load embeddings (cache in session state)
	if 'embeddings_loaded' not in st.session_state:
	with st.spinner("📄 Loading embeddings and data..."):
	try:
	cand_emb, comp_emb, cand_df, comp_df = load_embeddings()
	st.session_state.embeddings_loaded = True
	st.session_state.candidate_embeddings = cand_emb
	st.session_state.company_embeddings = comp_emb
	st.session_state.candidates_df = cand_df
	st.session_state.companies_df = comp_df

	st.markdown("""
	<div class="success-box">
	✅ Data loaded successfully! Ready to find talent.
	</div>
	""", unsafe_allow_html=True)
	except Exception as e:
	st.error(f"❌ Error loading data: {str(e)}")
	st.stop()

	# Company input section
	st.markdown("### 🔍 Enter Company ID or Name")

	col1, col2 = st.columns([3, 1])

	with col1:
	company_input = st.text_input(
	"Company ID or Name",
	value="9418",
	max_chars=100,
	help="Enter company ID (e.g., 9418) or search by name (e.g., Anblicks)",
	label_visibility="collapsed"
	)

	with col2:
	search_button = st.button("🚀 Find Candidates", use_container_width=True, type="primary")

	# Validate input
	is_valid, company_id_or_search, error_msg = validate_company_input(company_input)

	if not is_valid:
	st.warning(f"⚠️ {error_msg}")
	st.stop()

	# Determine if it's ID or search
	if isinstance(company_id_or_search, int):
	# Direct ID
	company_id = company_id_or_search

	# Check if company exists
	if company_id >= len(st.session_state.companies_df):
	st.error(f"❌ Company ID {company_id} not found. Maximum ID: {len(st.session_state.companies_df) - 1}")
	st.stop()

	company = st.session_state.companies_df.iloc[company_id]
	company_name = company.get('name', f'Company {company_id}')

	else:
	# Search by name
	found, company_id, company_name = find_company_by_name(st.session_state.companies_df, company_id_or_search)

	if not found:
	st.error(f"❌ No company found matching: '{company_id_or_search}'")
	st.info("💡 Tip: Try searching with partial name or use company ID directly")
	st.stop()

	company = st.session_state.companies_df.iloc[company_id]
	st.success(f"✅ Found: {company_name} (ID: {company_id})")

	# Show company info
	st.markdown(f"""
	<div class="info-box">
	<strong>Selected:</strong> {company_name} (ID: {company_id}) \|
	<strong>Total companies in system:</strong> {len(st.session_state.companies_df):,}
	</div>
	""", unsafe_allow_html=True)

	# Check if company has job postings
	has_postings = company.get('has_job_postings', False) if 'has_job_postings' in company else True

	if not has_postings:
	st.markdown("""
	<div class="warning-box">
	ℹ️ <strong>Note:</strong> This company uses <strong>collaborative filtering</strong>
	(skills inherited from similar companies). Matching still works but may be less precise than companies with direct job postings.
	</div>
	""", unsafe_allow_html=True)

	# Find matches
	with st.spinner("🔄 Finding top candidate matches..."):
	matches_list = find_top_candidate_matches(
	company_id,
	st.session_state.company_embeddings,
	st.session_state.candidate_embeddings,
	st.session_state.candidates_df,
	top_k
	)

	# Format matches for display
	matches = [
	(m['candidate_id'], m['score'], st.session_state.candidates_df.iloc[m['candidate_id']])
	for m in matches_list
	]

	# Filter by minimum score
	matches = [(cid, score, cdata) for cid, score, cdata in matches if score >= min_score]

	if not matches:
	st.warning(f"⚠️ No candidates found above {min_score:.0%} threshold. Try lowering the minimum score in the sidebar.")
	st.stop()

	st.markdown("---")

	# Display statistics
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("Total Matches", len(matches))

	with col2:
	avg_score = sum(score for _, score, _ in matches) / len(matches)
	st.metric("Average Score", f"{avg_score:.1%}")

	with col3:
	excellent = sum(1 for _, score, _ in matches if score >= 0.7)
	st.metric("Excellent Matches", excellent)

	with col4:
	best_score = max(score for _, score, _ in matches)
	st.metric("Best Match", f"{best_score:.1%}")

	st.markdown("---")

	# Create two columns for layout
	col1, col2 = st.columns([1, 2])

	with col1:
	# Company profile section
	st.markdown('<div class="section-header">🏢 Company Profile</div>', unsafe_allow_html=True)

	# Use basic display function
	display_company_profile_basic(company, company_id)

	with col2:
	# Matches section
	render_matches_section(matches, view_mode)

	st.markdown("---")

	# Network visualization (full width)
	render_network_section(company_id, matches, st.session_state.companies_df)

	st.markdown("---")

	# Technical info expander
	with st.expander("🔧 Technical Details", expanded=False):
	st.markdown(f"""
	Current Configuration:
	- Company ID: {company_id}
	- Company Name: {company_name}
	- Embedding Dimension: {EMBEDDING_DIMENSION}
	- Similarity Metric: Cosine Similarity
	- Top K Matches: {top_k}
	- Minimum Score: {min_score:.0%}
	- Candidates Available: {len(st.session_state.candidates_df):,}
	- Companies in System: {len(st.session_state.companies_df):,}

	Algorithm:
	1. Load pre-computed company embedding
	2. Calculate cosine similarity with all candidate embeddings
	3. Rank candidates by similarity score
	4. Return top-K matches above threshold

	Coverage Strategy:
	- Companies WITH job postings: Direct semantic matching
	- Companies WITHOUT postings: Collaborative filtering (inherit from similar companies)
	- Total coverage: 150K companies (5x expansion from 30K base)
	""")


	if __name__ == "__main__":
	main()