Initial commit

Creates an agent bot that reviews code to repositories that is has access to if it gets added as code reviewer Co-authored-by: Copilot <copilot@github.com>
2026-05-02 10:44:51 +02:00
parent 195ee229b1
commit 3b4dcabe66
17 changed files with 513 additions and 2 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,6 @@
+__pycache__
+.venv
+*.pyc
+*.pyo
+dist
+build
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.env
+.poller_seen.json
--- a/BOT_README.md
+++ b/BOT_README.md
@@ -0,0 +1,40 @@
+Gitea Bot
+----------
+
+This repository contains a Python-based Gitea bot that listens for pull request events and posts an automated review when the bot account is requested as a reviewer. The bot uses a configurable Google AI Studio / Gemini REST endpoint to generate review text.
+
+Files added:
+- [gitea_bot/main.py](gitea_bot/main.py#L1) - FastAPI webhook server
+- [gitea_bot/gitea_client.py](gitea_bot/gitea_client.py#L1) - minimal Gitea API helper
+- [gitea_bot/gemini_client.py](gitea_bot/gemini_client.py#L1) - wrapper for Google AI Studio REST endpoint
+- [Dockerfile](Dockerfile) - container image
+- [requirements.txt](requirements.txt) - Python deps
+
+Quick setup
+
+1. Build the Docker image:
+
+```bash
+docker build -t gitea-bot:latest .
+```
+
+2. Run the container (example):
+
+```bash
+docker run -e GITEA_API_URL="https://gitea.example.com/api/v1" \
+  -e GITEA_TOKEN="${GITEA_TOKEN}" \
+  -e BOT_USERNAME="your-bot-username" \
+  -e GOOGLE_AI_ENDPOINT="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent" \
+  -e GOOGLE_API_KEY="YOUR_KEY" \
+  -p 8000:8000 gitea-bot:latest
+```
+
+3. Configure a webhook in your Gitea repository pointing to `http://<host>:8000/webhook` and enable the `pull_request` event. When you request a review from the bot account the service will fetch the PR diff and post a review comment.
+
+Notes & configuration
+- Set `GITEA_API_URL` to your Gitea API base (usually `https://gitea.example.com/api/v1`).
+- The bot posts a single comment on the PR; for per-line review comments the Gitea API endpoint may differ and needs adjustment in `gitea_client.py`.
+- Configure `GOOGLE_AI_ENDPOINT` and `GOOGLE_API_KEY` to point to your Generative AI Studio model endpoint.
+
+Security
+- Keep `GITEA_TOKEN` and `GOOGLE_API_KEY` secret and prefer injecting via environment or secret manager.
--- a/16
+++ b/16
@@ -0,0 +1,16 @@
+FROM python:3.11-slim
+WORKDIR /app
+
+# Install runtime dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY gitea_bot ./gitea_bot
+
+ENV PYTHONUNBUFFERED=1
+EXPOSE 8000
+CMD ["uvicorn", "gitea_bot.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/README.md
+++ b/README.md
@@ -1,3 +1,3 @@
-# KARL
+# KARL der Computer

-Karl der Computer is a AI agent to review PRs.
+Karl is a AI agent to review PRs.
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,24 @@
+version: '3.8'
+services:
+  gitea-bot:
+    build: .
+    image: gitea-bot:latest
+    env_file:
+      - .env
+    ports:
+      - "8000:8000"
+    restart: unless-stopped
+    healthcheck:
+      test: [ "CMD-SHELL", "curl -f http://localhost:8000/ || exit 1" ]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+  poller:
+    build: .
+    image: gitea-bot:latest
+    env_file:
+      - .env
+    command: [ "python", "-u", "-m", "gitea_bot.poller" ]
+    restart: unless-stopped
+    depends_on:
+      - gitea-bot
--- a/gitea_bot/init.py
+++ b/gitea_bot/init.py
--- a/gitea_bot/pycache/gemini_client.cpython-313.pyc
+++ b/gitea_bot/pycache/gemini_client.cpython-313.pyc
--- a/gitea_bot/pycache/gitea_client.cpython-313.pyc
+++ b/gitea_bot/pycache/gitea_client.cpython-313.pyc
--- a/gitea_bot/pycache/main.cpython-313.pyc
+++ b/gitea_bot/pycache/main.cpython-313.pyc
--- a/gitea_bot/pycache/poller.cpython-313.pyc
+++ b/gitea_bot/pycache/poller.cpython-313.pyc
--- a/gitea_bot/pycache/server.cpython-312.pyc
+++ b/gitea_bot/pycache/server.cpython-312.pyc
--- a/gitea_bot/pycache/server.cpython-313.pyc
+++ b/gitea_bot/pycache/server.cpython-313.pyc
--- a/gitea_bot/gemini_client.py
+++ b/gitea_bot/gemini_client.py
@@ -0,0 +1,21 @@
+import os
+import google.genai as genai
+
+
+class GeminiClient:
+    def __init__(self):
+        self.api_key = os.getenv("GOOGLE_API_KEY")
+        if not self.api_key:
+            raise RuntimeError("GOOGLE_API_KEY must be set")
+
+        # Google Developer AI model (configurable via env).
+        self.model = os.getenv("GOOGLE_MODEL", "gemini-2.5-pro")
+        self.client = genai.Client(api_key=self.api_key)
+
+    def generate_review(self, prompt: str) -> str:
+        """Send prompt to Gemini and return the review."""
+        response = self.client.models.generate_content(
+            model=self.model,
+            contents=prompt
+        )
+        return response.text
--- a/gitea_bot/gitea_client.py
+++ b/gitea_bot/gitea_client.py
@@ -0,0 +1,93 @@
+import requests
+from typing import Iterator, List
+
+
+class GiteaClient:
+    def __init__(self, api_url: str, token: str):
+        self.api_url = api_url.rstrip("/")
+        self.token = token
+
+    def _headers(self):
+        return {"Authorization": f"token {self.token}", "Content-Type": "application/json"}
+    
+    def available_repositories(self) -> Iterator[tuple[str, str]]:
+        """List all repository URLs available to the token."""
+        url = f"{self.api_url}/user/repos"
+        r = requests.get(url, headers=self._headers(), timeout=30)
+        r.raise_for_status()
+
+        for repo in r.json():
+            owner = repo.get("owner", {}).get("login")
+            name = repo.get("name")
+            if owner and name:  # Skip repos with missing owner or name
+                yield owner, name
+            else:
+                print(f"Warning: Skipping repo with missing owner or name: {repo}")
+
+    def list_pull_request_files(self, owner: str, repo: str, pr_number: int) -> List[dict]:
+        """Try to list changed files for a pull request. If the endpoint differs, adjust."""
+        # Many Gitea instances expose PR files at /repos/{owner}/{repo}/pulls/{index}/files
+        url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{pr_number}/files"
+        r = requests.get(url, headers=self._headers(), timeout=30)
+        if r.status_code == 200:
+            return r.json()
+        # Fallback: try issues comments or single PR object
+        r.raise_for_status()
+
+    def get_pull_request_diff(self, owner: str, repo: str, pr_number: int) -> str:
+        """Fetch unified diff text for a pull request."""
+        url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{pr_number}.diff"
+        headers = {"Authorization": f"token {self.token}"}
+        r = requests.get(url, headers=headers, timeout=30)
+        r.raise_for_status()
+        return r.text
+
+    def create_issue_comment(self, owner: str, repo: str, issue_index: int, body: str) -> dict:
+        url = f"{self.api_url}/repos/{owner}/{repo}/issues/{issue_index}/comments"
+        r = requests.post(url, headers=self._headers(), json={"body": body}, timeout=30)
+        r.raise_for_status()
+        return r.json()
+
+    def list_open_pull_requests(self, owner: str, repo: str) -> List[dict]:
+        """List open pull requests for a repository."""
+        url = f"{self.api_url}/repos/{owner}/{repo}/pulls?state=open"
+        r = requests.get(url, headers=self._headers(), timeout=30)
+        r.raise_for_status()
+        return r.json()
+
+    def list_repos_for_owner(self, owner: str) -> List[dict]:
+        """Try to list repos for an owner (org or user). Returns list of repo dicts."""
+        # Try orgs endpoint first
+        url_org = f"{self.api_url}/orgs/{owner}/repos"
+        r = requests.get(url_org, headers=self._headers(), timeout=30)
+        if r.status_code == 200:
+            return r.json()
+        # Fallback to users endpoint
+        url_user = f"{self.api_url}/users/{owner}/repos"
+        r = requests.get(url_user, headers=self._headers(), timeout=30)
+        r.raise_for_status()
+        return r.json()
+
+    def create_pull_request_review(self, owner: str, repo: str, pr_number: int, body: str, comments: List[dict] = None) -> dict:
+        """Create a PR review with optional line-specific comments.
+        
+        Args:
+            owner: Repository owner
+            repo: Repository name
+            pr_number: PR number/index
+            body: General review comment
+            comments: List of line comments. Each comment dict should have:
+                - path: file path
+                - new_position: line number in new version
+                - body: comment text
+        """
+        url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{pr_number}/reviews"
+        payload = {
+            "body": body,
+            "event": "COMMENT"
+        }
+        if comments:
+            payload["comments"] = comments
+        r = requests.post(url, headers=self._headers(), json=payload, timeout=30)
+        r.raise_for_status()
+        return r.json()
--- a/gitea_bot/poller.py
+++ b/gitea_bot/poller.py
@@ -0,0 +1,304 @@
+import os
+import time
+import json
+import re
+from pathlib import Path
+from typing import List, Optional
+
+import requests
+from gitea_client import GiteaClient
+from gemini_client import GeminiClient
+from dotenv import load_dotenv
+
+# Load environment variables from parent directory .env (project root)
+env_path = Path(__file__).resolve().parents[1] / ".env"
+if env_path.exists():
+    load_dotenv(dotenv_path=env_path)
+
+# Configuration
+API_URL = os.getenv("GITEA_API_URL")
+TOKEN = os.getenv("GITEA_TOKEN")
+BOT = os.getenv("BOT_USERNAME")
+POLL_INTERVAL = int(os.getenv("POLL_INTERVAL", "60"))
+POLL_OWNER = os.getenv("POLL_OWNER")
+POLL_REPOS = os.getenv("POLL_REPOS")  # comma-separated owner/repo
+
+ROOT = Path(__file__).resolve().parent.parent
+SEEN_PATH = ROOT / ".poller_seen.json"
+
+if not (API_URL and TOKEN and BOT):
+    raise RuntimeError("GITEA_API_URL, GITEA_TOKEN and BOT_USERNAME must be set for poller")
+
+gitea = GiteaClient(API_URL, TOKEN)
+gemini = GeminiClient()
+
+
+def load_seen() -> set:
+    if SEEN_PATH.exists():
+        try:
+            with open(SEEN_PATH, "r", encoding="utf-8") as f:
+                return set(tuple(x) for x in json.load(f))
+        except Exception:
+            return set()
+    return set()
+
+
+def save_seen(seen: set):
+    with open(SEEN_PATH, "w", encoding="utf-8") as f:
+        json.dump([list(x) for x in seen], f)
+
+
+def build_prompt_from_file(file_dict: dict) -> str:
+    """Build a structured prompt for reviewing a single file diff."""
+    filename = file_dict.get("filename") or file_dict.get("path") or "unknown"
+    patch = file_dict.get("patch") or file_dict.get("diff") or ""
+
+    if len(patch) > 30000:
+        patch = patch[:30000] + "\n...TRUNCATED..."
+
+    prompt = (
+        "You are a senior code reviewer. Analyze exactly one file diff and return ONLY JSON.\n"
+        "You review C++ code with the Qt framework\n"
+        "Rules:\n"
+        "1) Only report real issues or actionable improvements.\n"
+        "2) Use diff positions (line index in the unified diff hunk) for comment anchoring.\n"
+        "3) Keep each comment short and specific.\n"
+        "4) If there are no findings, return an empty findings array.\n\n"
+        "JSON schema:\n"
+        "{\n"
+        "  \"summary\": \"short summary\",\n"
+        "  \"findings\": [\n"
+        "    {\n"
+        "      \"diff_position\": 12,\n"
+        "      \"severity\": \"high|medium|low\",\n"
+        "      \"comment\": \"text\"\n"
+        "    }\n"
+        "  ]\n"
+        "}\n\n"
+        f"File: {filename}\n"
+        "Unified diff:\n"
+        f"{patch}"
+    )
+    return prompt
+
+
+def extract_json_object(text: str) -> Optional[dict]:
+    """Extract a JSON object from model output, including fenced JSON blocks."""
+    if not text:
+        return None
+
+    raw = text.strip()
+    if raw.startswith("```"):
+        lines = raw.splitlines()
+        if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].strip() == "```":
+            raw = "\n".join(lines[1:-1]).strip()
+            if raw.startswith("json"):
+                raw = raw[4:].strip()
+
+    try:
+        data = json.loads(raw)
+        return data if isinstance(data, dict) else None
+    except json.JSONDecodeError:
+        pass
+
+    start = raw.find("{")
+    end = raw.rfind("}")
+    if start == -1 or end == -1 or end <= start:
+        return None
+
+    candidate = raw[start:end + 1]
+    try:
+        data = json.loads(candidate)
+        return data if isinstance(data, dict) else None
+    except json.JSONDecodeError:
+        return None
+
+
+def parse_structured_review(ai_response: str) -> dict:
+    """Parse model output into normalized review structure."""
+    parsed = extract_json_object(ai_response) or {}
+    summary = str(parsed.get("summary") or "No summary provided.").strip()
+    findings_raw = parsed.get("findings") or []
+    findings = []
+
+    if isinstance(findings_raw, list):
+        for item in findings_raw:
+            if not isinstance(item, dict):
+                continue
+
+            try:
+                diff_position = int(item.get("diff_position"))
+            except (TypeError, ValueError):
+                continue
+
+            comment = str(item.get("comment") or "").strip()
+            severity = str(item.get("severity") or "low").strip().lower()
+            if not comment:
+                continue
+
+            findings.append(
+                {
+                    "diff_position": diff_position,
+                    "severity": severity,
+                    "comment": comment,
+                }
+            )
+
+    return {"summary": summary, "findings": findings}
+
+
+def split_unified_diff_by_file(unified_diff: str) -> dict:
+    """Split a PR unified diff into per-file diff chunks keyed by new path."""
+    file_diffs = {}
+    current_lines: List[str] = []
+    current_path: Optional[str] = None
+
+    def flush_current() -> None:
+        if current_path and current_lines:
+            file_diffs[current_path] = "\n".join(current_lines).strip()
+
+    for line in unified_diff.splitlines():
+        if line.startswith("diff --git "):
+            flush_current()
+            current_lines = [line]
+            current_path = None
+            continue
+
+        if current_lines is not None:
+            current_lines.append(line)
+
+            # Example: +++ b/src/main.cpp
+            if line.startswith("+++ "):
+                raw_path = line[4:].strip()
+                if raw_path == "/dev/null":
+                    # Deleted file; fallback to old path if needed.
+                    continue
+                current_path = raw_path[2:] if raw_path.startswith("b/") else raw_path
+
+            # Fallback for rename/deletion edge cases.
+            if current_path is None and line.startswith("diff --git "):
+                match = re.match(r"diff --git a/(.+?) b/(.+)", line)
+                if match:
+                    current_path = match.group(2)
+
+    flush_current()
+    return file_diffs
+
+
+def handle_assignment(owner: str, repo: str, pr: dict):
+    pr_number = pr.get("number") or pr.get("index") or pr.get("id")
+    try:
+        files = gitea.list_pull_request_files(owner, repo, pr_number)
+    except Exception as e:
+        print(f"failed to fetch files for {owner}/{repo}#{pr_number}: {e}")
+        return False
+
+    if not files:
+        print(f"No files found for {owner}/{repo}#{pr_number}")
+        return False
+
+    # Some Gitea setups return filenames but no patch in /pulls/{n}/files.
+    fallback_patches = {}
+    if files and all(not (f.get("patch") or f.get("diff") or "").strip() for f in files):
+        try:
+            unified_diff = gitea.get_pull_request_diff(owner, repo, pr_number)
+            fallback_patches = split_unified_diff_by_file(unified_diff)
+            print(
+                f"Loaded fallback unified diff for {owner}/{repo}#{pr_number} "
+                f"({len(fallback_patches)} file patches)"
+            )
+        except Exception as e:
+            print(f"failed to load fallback diff for {owner}/{repo}#{pr_number}: {e}")
+
+    # Analyze each file individually based on its diff.
+    review_comments: List[dict] = []
+    file_summaries: List[str] = []
+
+    for file_dict in files:
+        filename = file_dict.get("filename") or file_dict.get("path")
+        if not filename:
+            continue
+
+        patch = file_dict.get("patch") or file_dict.get("diff") or ""
+        if not patch.strip() and fallback_patches:
+            patch = fallback_patches.get(filename, "")
+
+        if not patch.strip():
+            file_summaries.append(f"**{filename}**: No textual diff available.")
+            continue
+
+        file_for_prompt = dict(file_dict)
+        file_for_prompt["patch"] = patch
+
+        print(f"Analyzing {filename} for {owner}/{repo}#{pr_number}")
+        prompt = build_prompt_from_file(file_for_prompt)
+
+        try:
+            ai_response = gemini.generate_review(prompt)
+            parsed_review = parse_structured_review(ai_response)
+            file_summaries.append(f"**{filename}**: {parsed_review['summary']}")
+
+            for finding in parsed_review["findings"]:
+                severity = finding["severity"].upper()
+                body = f"[{severity}] {finding['comment']}"
+                review_comments.append({
+                    "path": filename,
+                    "new_position": finding["diff_position"],
+                    "body": body,
+                })
+        except Exception as e:
+            print(f"failed to generate review for {filename}: {e}")
+            file_summaries.append(f"**{filename}**: Error analyzing file - {e}")
+
+    # Create one PR review containing summary + line-anchored comments.
+    review_body = "AI Code Review\n\n" + "\n".join(file_summaries)
+
+    try:
+        gitea.create_pull_request_review(
+            owner, repo, pr_number,
+            body=review_body,
+            comments=review_comments if review_comments else None
+        )
+        print(f"Posted review for {owner}/{repo}#{pr_number} with {len(review_comments)} line comments")
+        return True
+    except Exception as e:
+        print(f"failed to post review for {owner}/{repo}#{pr_number}: {e}")
+        return False
+
+
+def run():
+    seen = load_seen()
+    print("Starting poller; checking repos...")
+    try:
+        while True:
+            repos = list(gitea.available_repositories())
+            print(f"Found {len(repos)} accessible repositories")
+            for owner, repo in repos:
+                try:
+                    prs = gitea.list_open_pull_requests(owner, repo)
+                except requests.exceptions.HTTPError as e:
+                    if e.response.status_code == 404:
+                        # Repo exists but is not accessible (permission or deleted)
+                        continue
+                    print(f"failed to list PRs for {owner}/{repo}: {e}")
+                    continue
+                except Exception as e:
+                    print(f"failed to list PRs for {owner}/{repo}: {e}")
+                    continue
+
+                for pr in prs:
+                    key = (f"{owner}/{repo}", pr.get("number"))
+                    reviewers = [r.get("login") or r.get("username") for r in (pr.get("requested_reviewers") or [])]
+                    if BOT in reviewers and key not in seen:
+                        print(f"Detected assignment: {key}")
+                        ok = handle_assignment(owner, repo, pr)
+                        if ok:
+                            seen.add(key)
+                            save_seen(seen)
+            time.sleep(POLL_INTERVAL)
+    except KeyboardInterrupt:
+        print("Poller stopped")
+
+
+if __name__ == "__main__":
+    run()
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+fastapi==0.95.2
+uvicorn[standard]==0.22.0
+requests==2.31.0
+python-dotenv==1.0.1
+google-genai