Spaces:

Kamyar-zeinalipour
/

CS-neda

Sleeping

App Files Files Community

CS-neda / ui.py

Kamyar-zeinalipour

Update ui.py

ef57bca verified 6 months ago

raw

history blame contribute delete

13.6 kB

	# paragraph_annotation_tool.py NEW ui
	"""
	Paragraph-level annotation tool with per-model comments
	────────────────────────────────────────────────────────────────────────────
	• Upload a CSV containing at least:
	Content_Paragraph,
	<model>_prompt1, <model>_prompt2 … for each model
	• Enter your annotator name and click “Start / Resume”.
	• Rate each prompt A / B / C, optionally leave a comment for every model,
	navigate Back / Next, download the annotated CSV.

	New in this version
	───────────────────
	• One comment textbox per model (shown next to the model’s two ratings).
	• Comments are stored in the CSV under columns named ``comment_<model>``.
	• Blank comments are saved as the literal string ``"no comment"``.
	"""
	from __future__ import annotations
	import gradio as gr, pandas as pd
	import random, time, os, shutil, uuid
	from typing import List

	# ─── CONFIG ───────────────────────────────────────────────────────────────
	MAX_MODELS = 10 # pre-allocate up to this many models
	CONTENT_COL = "Content_Paragraph"
	PROMPT1_SUFFIX = "_prompt1"
	PROMPT2_SUFFIX = "_prompt2"
	COMMENT_PREFIX = "comment_" # <COMMENT_PREFIX><model>
	PERM_COL = "perm_models"
	RATING_OPTS = ["A", "B", "C"]

	# ─── GLOBALS (filled after CSV load) ───────────────────────────────────────
	df: pd.DataFrame \| None = None
	csv_path: str \| None = None
	models: List[str] = []
	TOTAL = 0
	annotator = ""
	current_start: float \| None = None

	# ─── CSV HELPERS ───────────────────────────────────────────────────────────

	def load_csv(path: str):
	"""Read CSV, discover model columns, add helper columns if needed."""
	global df, models, TOTAL, csv_path
	csv_path = path
	df = pd.read_csv(csv_path, keep_default_na=False)
	TOTAL = len(df)

	models.clear()
	for col in df.columns:
	if col.endswith(PROMPT1_SUFFIX) and not col.startswith("rating_"):
	m = col[: -len(PROMPT1_SUFFIX)]
	if f"{m}{PROMPT2_SUFFIX}" not in df.columns:
	raise ValueError(f"Found '{col}' but no '{m}{PROMPT2_SUFFIX}'")
	models.append(m)

	if not models:
	raise ValueError(f"No '*{PROMPT1_SUFFIX}' columns found")
	if len(models) > MAX_MODELS:
	raise ValueError(
	f"CSV has {len(models)} models but MAX_MODELS is {MAX_MODELS}")

	# helper columns
	if PERM_COL not in df.columns:
	df[PERM_COL] = ""
	for m in models:
	# rating columns per prompt
	for p in ("prompt1", "prompt2"):
	rc = f"rating_{m}__{p}"
	if rc not in df.columns:
	df[rc] = ""
	# NEW → comment column per model
	cc = f"{COMMENT_PREFIX}{m}"
	if cc not in df.columns:
	df[cc] = "no comment" # default value

	for col in ("annotator", "annotation_time"):
	if col not in df.columns:
	df[col] = "" if col == "annotator" else 0.0


	# ─── BOOK-KEEPING ──────────────────────────────────────────────────────────

	def first_incomplete() -> int:
	for i, row in df.iterrows():
	for m in models:
	if (
	row[f"rating_{m}__prompt1"] == "" or
	row[f"rating_{m}__prompt2"] == ""
	):
	return i
	return 0


	def get_perm(idx: int) -> List[str]:
	cell = str(df.at[idx, PERM_COL])
	if not cell:
	seq = models.copy()
	random.shuffle(seq)
	df.at[idx, PERM_COL] = "\|".join(seq)
	df.to_csv(csv_path, index=False)
	return df.at[idx, PERM_COL].split("\|")


	def build_row(idx: int):
	"""Return fixed-length tuple of widget values for example idx."""
	global current_start
	row = df.loc[idx]
	order = get_perm(idx)

	outs, rates, comms = [], [], []
	for m in order:
	outs.append(row[f"{m}{PROMPT1_SUFFIX}"])
	outs.append(row[f"{m}{PROMPT2_SUFFIX}"])
	rates.append(row[f"rating_{m}__prompt1"] or None)
	rates.append(row[f"rating_{m}__prompt2"] or None)
	val = row[f"{COMMENT_PREFIX}{m}"]
	comms.append("" if val == "no comment" else val)


	# pad up to MAX_MODELS
	outs += [""] * (MAX_MODELS*2 - len(outs))
	rates += [None]* (MAX_MODELS*2 - len(rates))
	comms += ["" ] * (MAX_MODELS - len(comms))

	ready = all(r in RATING_OPTS for r in rates[: 2*len(models)])
	current_start = time.time()

	header = f"Example {idx+1}/{TOTAL}"
	return (
	idx, header, row[CONTENT_COL],
	outs, rates, *comms,
	gr.update(visible=True), # back_btn update
	gr.update(visible=True, interactive=ready) # next_btn update
	)


	def save_row(idx: int, ratings: List[str], comments: List[str]):
	"""Persist ratings & comments for example idx → CSV."""
	if not all(r in RATING_OPTS for r in ratings[: 2*len(models)]):
	return
	elapsed = time.time() - current_start if current_start else 0.0

	order = get_perm(idx)
	p = 0 # rating pointer
	for m in order:
	df.at[idx, f"rating_{m}__prompt1"] = ratings[p]; p += 1
	df.at[idx, f"rating_{m}__prompt2"] = ratings[p]; p += 1
	# comments
	for m, c in zip(order, comments):
	clean = (c or "").strip()
	df.at[idx, f"{COMMENT_PREFIX}{m}"] = clean or "no comment"

	df.at[idx, "annotator"] = annotator
	df.at[idx, "annotation_time"] = float(elapsed)
	df.to_csv(csv_path, index=False)


	def _writable_dir() -> str:
	"""Return /data on Spaces, /tmp elsewhere – whichever is writeable."""
	for d in ("/data", "/tmp"):
	try:
	os.makedirs(d, exist_ok=True)
	with open(os.path.join(d, ".touch"), "w"):
	pass
	return d
	except PermissionError:
	continue
	raise PermissionError("No writable directory found.")


	# ─── GRADIO UI ────────────────────────────────────────────────────────────
	with gr.Blocks(title="Paragraph Annotation Tool") as demo:
	# shared state
	idx_state = gr.State(0) # current example index
	nmodels_state = gr.State(0) # how many model slots are active

	gr.Markdown("## Paragraph Annotation Tool")

	with gr.Row():
	upload_box = gr.File(label="Upload / Resume CSV", file_types=[".csv"])
	annot_box = gr.Textbox(label="Annotator name")
	start_btn = gr.Button("Start / Resume")

	annotator_label = gr.Markdown(visible=False)

	annotation_area = gr.Column(visible=False)
	with annotation_area:
	idx_box = gr.Number(label="Index", interactive=False)
	hdr_box = gr.Markdown()
	para_box = gr.Textbox(lines=6, interactive=False,
	label="Content Paragraph")

	# Pre-allocate up to MAX_MODELS slots
	out_boxes, radio_widgets, comment_boxes = [], [], []
	for _ in range(MAX_MODELS):
	with gr.Row():
	# prompts + ratings
	with gr.Column(scale=2):
	out1 = gr.Textbox(lines=6, interactive=False)
	rad1 = gr.Radio(RATING_OPTS, label="Rating (P1)", value=None)
	with gr.Column(scale=2):
	out2 = gr.Textbox(lines=6, interactive=False)
	rad2 = gr.Radio(RATING_OPTS, label="Rating (P2)", value=None)
	# NEW → comment textbox
	with gr.Column(scale=1):
	com = gr.Textbox(lines=2, label="Comment", placeholder="Optional…")
	out_boxes.extend((out1, out2))
	radio_widgets.extend((rad1, rad2))
	comment_boxes.append(com)

	back_btn = gr.Button("⟵ Back", visible=False)
	next_btn = gr.Button("Save & Next ⟶", visible=False)
	download_btn = gr.Button("💾 Download CSV", visible=False)

	# Enable NEXT when visible radios are filled (comments are optional)
	def toggle_next(model_cnt: int, *vals):
	needed = vals[: model_cnt*2] # only rating radios
	return gr.update(interactive=all(v in RATING_OPTS for v in needed))

	for r in radio_widgets:
	r.change(toggle_next,
	inputs=[nmodels_state]+radio_widgets,
	outputs=next_btn)

	# ── navigation callbacks ──────────────────────────────────────────────
	def goto(step: int):
	def _fn(idx: int, model_cnt: int, *vals):
	"""Handle Back / Next logic."""
	# structure of vals: radios (model_cnt*2) + comments (model_cnt) + next_btn
	RADIO_COUNT = MAX_MODELS * 2
	ratings = list(vals[: model_cnt * 2])
	comments = list(vals[RADIO_COUNT : RADIO_COUNT + model_cnt])

	# save current row unless we attempted to go back without finishing ratings
	if step != -1 or all(r in RATING_OPTS for r in ratings):
	save_row(idx, ratings, comments)
	new_idx = max(0, min(idx+step, TOTAL-1))
	return build_row(new_idx)
	return _fn

	back_btn.click(
	goto(-1),
	inputs=[idx_state, nmodels_state]+radio_widgets+comment_boxes+[next_btn],
	outputs=[idx_state, hdr_box, para_box,
	out_boxes, radio_widgets, *comment_boxes,
	back_btn, next_btn],
	)

	next_btn.click(
	goto(1),
	inputs=[idx_state, nmodels_state]+radio_widgets+comment_boxes+[next_btn],
	outputs=[idx_state, hdr_box, para_box,
	out_boxes, radio_widgets, *comment_boxes,
	back_btn, next_btn],
	)

	# CSV download
	def make_download():
	if df is None:
	raise gr.Error("No CSV loaded yet.")
	tmp = os.path.join(_writable_dir(),
	f"annotations_{uuid.uuid4().hex}.csv")
	df.to_csv(tmp, index=False)
	return tmp
	download_btn.click(make_download, outputs=gr.File())

	# ── Start / Resume ────────────────────────────────────────────────────
	def start_app(csv_file, name):
	global annotator
	if csv_file is None or not name.strip():
	raise gr.Error("Please upload a CSV and enter your name.")

	new_path = os.path.join(_writable_dir(), f"{uuid.uuid4().hex}.csv")
	shutil.copy(csv_file.name, new_path)
	load_csv(new_path)
	annotator = name.strip()

	# visibility flags – one boolean per model slot
	vis_flags = [i < len(models) for i in range(MAX_MODELS)]

	# build first row values
	row_vals = build_row(first_incomplete())
	idx_val, hdr_val, para_val = row_vals[:3]
	outs = row_vals[3 : 3 + MAX_MODELS*2]
	rates = row_vals[3 + MAX_MODELS2 : 3 + MAX_MODELS4]
	comms = row_vals[3 + MAX_MODELS4 : 3 + MAX_MODELS5]
	back_update, next_update = row_vals[-2:]

	# updates for textboxes, radios, comments
	out_updates = [
	gr.update(value=outs[i], visible=vis_flags[i//2])
	for i in range(MAX_MODELS*2)
	]
	radio_updates = [
	gr.update(value=rates[i], visible=vis_flags[i//2])
	for i in range(MAX_MODELS*2)
	]
	comment_updates = [
	gr.update(value=comms[i], visible=vis_flags[i])
	for i in range(MAX_MODELS)
	]

	return (
	first_incomplete(), # idx_state
	len(models), # nmodels_state
	gr.update(value=idx_val), # idx_box
	gr.update(value=hdr_val), # hdr_box
	gr.update(value=para_val), # para_box
	*out_updates,
	*radio_updates,
	*comment_updates,
	back_update, next_update, # nav buttons
	gr.update(visible=True,
	value=f"Annotator: {annotator}"),
	gr.update(visible=True), # download_btn
	gr.update(visible=True) # annotation_area
	)

	start_btn.click(
	start_app,
	inputs=[upload_box, annot_box],
	outputs=[
	idx_state, nmodels_state,
	idx_box, hdr_box, para_box,
	out_boxes, radio_widgets, *comment_boxes,
	back_btn, next_btn,
	annotator_label,
	download_btn,
	annotation_area
	],
	)

	# ─── RUN ───────────────────────────────────────────────────────────────────
	if __name__ == "__main__":
	demo.queue()
	demo.launch() # keep share=False on HF Spaces