diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..ca35f02 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +__pycache__ +.venv +*.pyc +*.pyo +dist +build diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..266a116 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.env +.poller_seen.json \ No newline at end of file diff --git a/BOT_README.md b/BOT_README.md new file mode 100644 index 0000000..9b7defa --- /dev/null +++ b/BOT_README.md @@ -0,0 +1,40 @@ +Gitea Bot +---------- + +This repository contains a Python-based Gitea bot that listens for pull request events and posts an automated review when the bot account is requested as a reviewer. The bot uses a configurable Google AI Studio / Gemini REST endpoint to generate review text. + +Files added: +- [gitea_bot/main.py](gitea_bot/main.py#L1) - FastAPI webhook server +- [gitea_bot/gitea_client.py](gitea_bot/gitea_client.py#L1) - minimal Gitea API helper +- [gitea_bot/gemini_client.py](gitea_bot/gemini_client.py#L1) - wrapper for Google AI Studio REST endpoint +- [Dockerfile](Dockerfile) - container image +- [requirements.txt](requirements.txt) - Python deps + +Quick setup + +1. Build the Docker image: + +```bash +docker build -t gitea-bot:latest . +``` + +2. Run the container (example): + +```bash +docker run -e GITEA_API_URL="https://gitea.example.com/api/v1" \ + -e GITEA_TOKEN="${GITEA_TOKEN}" \ + -e BOT_USERNAME="your-bot-username" \ + -e GOOGLE_AI_ENDPOINT="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent" \ + -e GOOGLE_API_KEY="YOUR_KEY" \ + -p 8000:8000 gitea-bot:latest +``` + +3. Configure a webhook in your Gitea repository pointing to `http://:8000/webhook` and enable the `pull_request` event. When you request a review from the bot account the service will fetch the PR diff and post a review comment. + +Notes & configuration +- Set `GITEA_API_URL` to your Gitea API base (usually `https://gitea.example.com/api/v1`). +- The bot posts a single comment on the PR; for per-line review comments the Gitea API endpoint may differ and needs adjustment in `gitea_client.py`. +- Configure `GOOGLE_AI_ENDPOINT` and `GOOGLE_API_KEY` to point to your Generative AI Studio model endpoint. + +Security +- Keep `GITEA_TOKEN` and `GOOGLE_API_KEY` secret and prefer injecting via environment or secret manager. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cff5d63 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.11-slim +WORKDIR /app + +# Install runtime dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +COPY gitea_bot ./gitea_bot + +ENV PYTHONUNBUFFERED=1 +EXPOSE 8000 +CMD ["uvicorn", "gitea_bot.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index 7be3ab8..c16772e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ -# KARL +# KARL der Computer -Karl der Computer is a AI agent to review PRs. \ No newline at end of file +Karl is a AI agent to review PRs. \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..40a8923 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,24 @@ +version: '3.8' +services: + gitea-bot: + build: . + image: gitea-bot:latest + env_file: + - .env + ports: + - "8000:8000" + restart: unless-stopped + healthcheck: + test: [ "CMD-SHELL", "curl -f http://localhost:8000/ || exit 1" ] + interval: 30s + timeout: 10s + retries: 3 + poller: + build: . + image: gitea-bot:latest + env_file: + - .env + command: [ "python", "-u", "-m", "gitea_bot.poller" ] + restart: unless-stopped + depends_on: + - gitea-bot diff --git a/gitea_bot/__init__.py b/gitea_bot/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gitea_bot/__pycache__/gemini_client.cpython-313.pyc b/gitea_bot/__pycache__/gemini_client.cpython-313.pyc new file mode 100644 index 0000000..5848ba0 Binary files /dev/null and b/gitea_bot/__pycache__/gemini_client.cpython-313.pyc differ diff --git a/gitea_bot/__pycache__/gitea_client.cpython-313.pyc b/gitea_bot/__pycache__/gitea_client.cpython-313.pyc new file mode 100644 index 0000000..6c878a6 Binary files /dev/null and b/gitea_bot/__pycache__/gitea_client.cpython-313.pyc differ diff --git a/gitea_bot/__pycache__/main.cpython-313.pyc b/gitea_bot/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000..a5cafcb Binary files /dev/null and b/gitea_bot/__pycache__/main.cpython-313.pyc differ diff --git a/gitea_bot/__pycache__/poller.cpython-313.pyc b/gitea_bot/__pycache__/poller.cpython-313.pyc new file mode 100644 index 0000000..c83a5bf Binary files /dev/null and b/gitea_bot/__pycache__/poller.cpython-313.pyc differ diff --git a/gitea_bot/__pycache__/server.cpython-312.pyc b/gitea_bot/__pycache__/server.cpython-312.pyc new file mode 100644 index 0000000..0faf881 Binary files /dev/null and b/gitea_bot/__pycache__/server.cpython-312.pyc differ diff --git a/gitea_bot/__pycache__/server.cpython-313.pyc b/gitea_bot/__pycache__/server.cpython-313.pyc new file mode 100644 index 0000000..4340f3a Binary files /dev/null and b/gitea_bot/__pycache__/server.cpython-313.pyc differ diff --git a/gitea_bot/gemini_client.py b/gitea_bot/gemini_client.py new file mode 100644 index 0000000..19d9490 --- /dev/null +++ b/gitea_bot/gemini_client.py @@ -0,0 +1,21 @@ +import os +import google.genai as genai + + +class GeminiClient: + def __init__(self): + self.api_key = os.getenv("GOOGLE_API_KEY") + if not self.api_key: + raise RuntimeError("GOOGLE_API_KEY must be set") + + # Google Developer AI model (configurable via env). + self.model = os.getenv("GOOGLE_MODEL", "gemini-2.5-pro") + self.client = genai.Client(api_key=self.api_key) + + def generate_review(self, prompt: str) -> str: + """Send prompt to Gemini and return the review.""" + response = self.client.models.generate_content( + model=self.model, + contents=prompt + ) + return response.text diff --git a/gitea_bot/gitea_client.py b/gitea_bot/gitea_client.py new file mode 100644 index 0000000..923abca --- /dev/null +++ b/gitea_bot/gitea_client.py @@ -0,0 +1,93 @@ +import requests +from typing import Iterator, List + + +class GiteaClient: + def __init__(self, api_url: str, token: str): + self.api_url = api_url.rstrip("/") + self.token = token + + def _headers(self): + return {"Authorization": f"token {self.token}", "Content-Type": "application/json"} + + def available_repositories(self) -> Iterator[tuple[str, str]]: + """List all repository URLs available to the token.""" + url = f"{self.api_url}/user/repos" + r = requests.get(url, headers=self._headers(), timeout=30) + r.raise_for_status() + + for repo in r.json(): + owner = repo.get("owner", {}).get("login") + name = repo.get("name") + if owner and name: # Skip repos with missing owner or name + yield owner, name + else: + print(f"Warning: Skipping repo with missing owner or name: {repo}") + + def list_pull_request_files(self, owner: str, repo: str, pr_number: int) -> List[dict]: + """Try to list changed files for a pull request. If the endpoint differs, adjust.""" + # Many Gitea instances expose PR files at /repos/{owner}/{repo}/pulls/{index}/files + url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{pr_number}/files" + r = requests.get(url, headers=self._headers(), timeout=30) + if r.status_code == 200: + return r.json() + # Fallback: try issues comments or single PR object + r.raise_for_status() + + def get_pull_request_diff(self, owner: str, repo: str, pr_number: int) -> str: + """Fetch unified diff text for a pull request.""" + url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{pr_number}.diff" + headers = {"Authorization": f"token {self.token}"} + r = requests.get(url, headers=headers, timeout=30) + r.raise_for_status() + return r.text + + def create_issue_comment(self, owner: str, repo: str, issue_index: int, body: str) -> dict: + url = f"{self.api_url}/repos/{owner}/{repo}/issues/{issue_index}/comments" + r = requests.post(url, headers=self._headers(), json={"body": body}, timeout=30) + r.raise_for_status() + return r.json() + + def list_open_pull_requests(self, owner: str, repo: str) -> List[dict]: + """List open pull requests for a repository.""" + url = f"{self.api_url}/repos/{owner}/{repo}/pulls?state=open" + r = requests.get(url, headers=self._headers(), timeout=30) + r.raise_for_status() + return r.json() + + def list_repos_for_owner(self, owner: str) -> List[dict]: + """Try to list repos for an owner (org or user). Returns list of repo dicts.""" + # Try orgs endpoint first + url_org = f"{self.api_url}/orgs/{owner}/repos" + r = requests.get(url_org, headers=self._headers(), timeout=30) + if r.status_code == 200: + return r.json() + # Fallback to users endpoint + url_user = f"{self.api_url}/users/{owner}/repos" + r = requests.get(url_user, headers=self._headers(), timeout=30) + r.raise_for_status() + return r.json() + + def create_pull_request_review(self, owner: str, repo: str, pr_number: int, body: str, comments: List[dict] = None) -> dict: + """Create a PR review with optional line-specific comments. + + Args: + owner: Repository owner + repo: Repository name + pr_number: PR number/index + body: General review comment + comments: List of line comments. Each comment dict should have: + - path: file path + - new_position: line number in new version + - body: comment text + """ + url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{pr_number}/reviews" + payload = { + "body": body, + "event": "COMMENT" + } + if comments: + payload["comments"] = comments + r = requests.post(url, headers=self._headers(), json=payload, timeout=30) + r.raise_for_status() + return r.json() diff --git a/gitea_bot/poller.py b/gitea_bot/poller.py new file mode 100644 index 0000000..2286338 --- /dev/null +++ b/gitea_bot/poller.py @@ -0,0 +1,304 @@ +import os +import time +import json +import re +from pathlib import Path +from typing import List, Optional + +import requests +from gitea_client import GiteaClient +from gemini_client import GeminiClient +from dotenv import load_dotenv + +# Load environment variables from parent directory .env (project root) +env_path = Path(__file__).resolve().parents[1] / ".env" +if env_path.exists(): + load_dotenv(dotenv_path=env_path) + +# Configuration +API_URL = os.getenv("GITEA_API_URL") +TOKEN = os.getenv("GITEA_TOKEN") +BOT = os.getenv("BOT_USERNAME") +POLL_INTERVAL = int(os.getenv("POLL_INTERVAL", "60")) +POLL_OWNER = os.getenv("POLL_OWNER") +POLL_REPOS = os.getenv("POLL_REPOS") # comma-separated owner/repo + +ROOT = Path(__file__).resolve().parent.parent +SEEN_PATH = ROOT / ".poller_seen.json" + +if not (API_URL and TOKEN and BOT): + raise RuntimeError("GITEA_API_URL, GITEA_TOKEN and BOT_USERNAME must be set for poller") + +gitea = GiteaClient(API_URL, TOKEN) +gemini = GeminiClient() + + +def load_seen() -> set: + if SEEN_PATH.exists(): + try: + with open(SEEN_PATH, "r", encoding="utf-8") as f: + return set(tuple(x) for x in json.load(f)) + except Exception: + return set() + return set() + + +def save_seen(seen: set): + with open(SEEN_PATH, "w", encoding="utf-8") as f: + json.dump([list(x) for x in seen], f) + + +def build_prompt_from_file(file_dict: dict) -> str: + """Build a structured prompt for reviewing a single file diff.""" + filename = file_dict.get("filename") or file_dict.get("path") or "unknown" + patch = file_dict.get("patch") or file_dict.get("diff") or "" + + if len(patch) > 30000: + patch = patch[:30000] + "\n...TRUNCATED..." + + prompt = ( + "You are a senior code reviewer. Analyze exactly one file diff and return ONLY JSON.\n" + "You review C++ code with the Qt framework\n" + "Rules:\n" + "1) Only report real issues or actionable improvements.\n" + "2) Use diff positions (line index in the unified diff hunk) for comment anchoring.\n" + "3) Keep each comment short and specific.\n" + "4) If there are no findings, return an empty findings array.\n\n" + "JSON schema:\n" + "{\n" + " \"summary\": \"short summary\",\n" + " \"findings\": [\n" + " {\n" + " \"diff_position\": 12,\n" + " \"severity\": \"high|medium|low\",\n" + " \"comment\": \"text\"\n" + " }\n" + " ]\n" + "}\n\n" + f"File: {filename}\n" + "Unified diff:\n" + f"{patch}" + ) + return prompt + + +def extract_json_object(text: str) -> Optional[dict]: + """Extract a JSON object from model output, including fenced JSON blocks.""" + if not text: + return None + + raw = text.strip() + if raw.startswith("```"): + lines = raw.splitlines() + if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].strip() == "```": + raw = "\n".join(lines[1:-1]).strip() + if raw.startswith("json"): + raw = raw[4:].strip() + + try: + data = json.loads(raw) + return data if isinstance(data, dict) else None + except json.JSONDecodeError: + pass + + start = raw.find("{") + end = raw.rfind("}") + if start == -1 or end == -1 or end <= start: + return None + + candidate = raw[start:end + 1] + try: + data = json.loads(candidate) + return data if isinstance(data, dict) else None + except json.JSONDecodeError: + return None + + +def parse_structured_review(ai_response: str) -> dict: + """Parse model output into normalized review structure.""" + parsed = extract_json_object(ai_response) or {} + summary = str(parsed.get("summary") or "No summary provided.").strip() + findings_raw = parsed.get("findings") or [] + findings = [] + + if isinstance(findings_raw, list): + for item in findings_raw: + if not isinstance(item, dict): + continue + + try: + diff_position = int(item.get("diff_position")) + except (TypeError, ValueError): + continue + + comment = str(item.get("comment") or "").strip() + severity = str(item.get("severity") or "low").strip().lower() + if not comment: + continue + + findings.append( + { + "diff_position": diff_position, + "severity": severity, + "comment": comment, + } + ) + + return {"summary": summary, "findings": findings} + + +def split_unified_diff_by_file(unified_diff: str) -> dict: + """Split a PR unified diff into per-file diff chunks keyed by new path.""" + file_diffs = {} + current_lines: List[str] = [] + current_path: Optional[str] = None + + def flush_current() -> None: + if current_path and current_lines: + file_diffs[current_path] = "\n".join(current_lines).strip() + + for line in unified_diff.splitlines(): + if line.startswith("diff --git "): + flush_current() + current_lines = [line] + current_path = None + continue + + if current_lines is not None: + current_lines.append(line) + + # Example: +++ b/src/main.cpp + if line.startswith("+++ "): + raw_path = line[4:].strip() + if raw_path == "/dev/null": + # Deleted file; fallback to old path if needed. + continue + current_path = raw_path[2:] if raw_path.startswith("b/") else raw_path + + # Fallback for rename/deletion edge cases. + if current_path is None and line.startswith("diff --git "): + match = re.match(r"diff --git a/(.+?) b/(.+)", line) + if match: + current_path = match.group(2) + + flush_current() + return file_diffs + + +def handle_assignment(owner: str, repo: str, pr: dict): + pr_number = pr.get("number") or pr.get("index") or pr.get("id") + try: + files = gitea.list_pull_request_files(owner, repo, pr_number) + except Exception as e: + print(f"failed to fetch files for {owner}/{repo}#{pr_number}: {e}") + return False + + if not files: + print(f"No files found for {owner}/{repo}#{pr_number}") + return False + + # Some Gitea setups return filenames but no patch in /pulls/{n}/files. + fallback_patches = {} + if files and all(not (f.get("patch") or f.get("diff") or "").strip() for f in files): + try: + unified_diff = gitea.get_pull_request_diff(owner, repo, pr_number) + fallback_patches = split_unified_diff_by_file(unified_diff) + print( + f"Loaded fallback unified diff for {owner}/{repo}#{pr_number} " + f"({len(fallback_patches)} file patches)" + ) + except Exception as e: + print(f"failed to load fallback diff for {owner}/{repo}#{pr_number}: {e}") + + # Analyze each file individually based on its diff. + review_comments: List[dict] = [] + file_summaries: List[str] = [] + + for file_dict in files: + filename = file_dict.get("filename") or file_dict.get("path") + if not filename: + continue + + patch = file_dict.get("patch") or file_dict.get("diff") or "" + if not patch.strip() and fallback_patches: + patch = fallback_patches.get(filename, "") + + if not patch.strip(): + file_summaries.append(f"**{filename}**: No textual diff available.") + continue + + file_for_prompt = dict(file_dict) + file_for_prompt["patch"] = patch + + print(f"Analyzing {filename} for {owner}/{repo}#{pr_number}") + prompt = build_prompt_from_file(file_for_prompt) + + try: + ai_response = gemini.generate_review(prompt) + parsed_review = parse_structured_review(ai_response) + file_summaries.append(f"**{filename}**: {parsed_review['summary']}") + + for finding in parsed_review["findings"]: + severity = finding["severity"].upper() + body = f"[{severity}] {finding['comment']}" + review_comments.append({ + "path": filename, + "new_position": finding["diff_position"], + "body": body, + }) + except Exception as e: + print(f"failed to generate review for {filename}: {e}") + file_summaries.append(f"**{filename}**: Error analyzing file - {e}") + + # Create one PR review containing summary + line-anchored comments. + review_body = "AI Code Review\n\n" + "\n".join(file_summaries) + + try: + gitea.create_pull_request_review( + owner, repo, pr_number, + body=review_body, + comments=review_comments if review_comments else None + ) + print(f"Posted review for {owner}/{repo}#{pr_number} with {len(review_comments)} line comments") + return True + except Exception as e: + print(f"failed to post review for {owner}/{repo}#{pr_number}: {e}") + return False + + +def run(): + seen = load_seen() + print("Starting poller; checking repos...") + try: + while True: + repos = list(gitea.available_repositories()) + print(f"Found {len(repos)} accessible repositories") + for owner, repo in repos: + try: + prs = gitea.list_open_pull_requests(owner, repo) + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + # Repo exists but is not accessible (permission or deleted) + continue + print(f"failed to list PRs for {owner}/{repo}: {e}") + continue + except Exception as e: + print(f"failed to list PRs for {owner}/{repo}: {e}") + continue + + for pr in prs: + key = (f"{owner}/{repo}", pr.get("number")) + reviewers = [r.get("login") or r.get("username") for r in (pr.get("requested_reviewers") or [])] + if BOT in reviewers and key not in seen: + print(f"Detected assignment: {key}") + ok = handle_assignment(owner, repo, pr) + if ok: + seen.add(key) + save_seen(seen) + time.sleep(POLL_INTERVAL) + except KeyboardInterrupt: + print("Poller stopped") + + +if __name__ == "__main__": + run() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c5f7133 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +fastapi==0.95.2 +uvicorn[standard]==0.22.0 +requests==2.31.0 +python-dotenv==1.0.1 +google-genai