Initial commit

Creates an agent bot that reviews code to repositories that is has access to if it gets added as code reviewer

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
Kalle Bracht
2026-05-02 10:44:51 +02:00
parent 195ee229b1
commit 3b4dcabe66
17 changed files with 513 additions and 2 deletions

6
.dockerignore Normal file
View File

@@ -0,0 +1,6 @@
__pycache__
.venv
*.pyc
*.pyo
dist
build

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
.env
.poller_seen.json

40
BOT_README.md Normal file
View File

@@ -0,0 +1,40 @@
Gitea Bot
----------
This repository contains a Python-based Gitea bot that listens for pull request events and posts an automated review when the bot account is requested as a reviewer. The bot uses a configurable Google AI Studio / Gemini REST endpoint to generate review text.
Files added:
- [gitea_bot/main.py](gitea_bot/main.py#L1) - FastAPI webhook server
- [gitea_bot/gitea_client.py](gitea_bot/gitea_client.py#L1) - minimal Gitea API helper
- [gitea_bot/gemini_client.py](gitea_bot/gemini_client.py#L1) - wrapper for Google AI Studio REST endpoint
- [Dockerfile](Dockerfile) - container image
- [requirements.txt](requirements.txt) - Python deps
Quick setup
1. Build the Docker image:
```bash
docker build -t gitea-bot:latest .
```
2. Run the container (example):
```bash
docker run -e GITEA_API_URL="https://gitea.example.com/api/v1" \
-e GITEA_TOKEN="${GITEA_TOKEN}" \
-e BOT_USERNAME="your-bot-username" \
-e GOOGLE_AI_ENDPOINT="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent" \
-e GOOGLE_API_KEY="YOUR_KEY" \
-p 8000:8000 gitea-bot:latest
```
3. Configure a webhook in your Gitea repository pointing to `http://<host>:8000/webhook` and enable the `pull_request` event. When you request a review from the bot account the service will fetch the PR diff and post a review comment.
Notes & configuration
- Set `GITEA_API_URL` to your Gitea API base (usually `https://gitea.example.com/api/v1`).
- The bot posts a single comment on the PR; for per-line review comments the Gitea API endpoint may differ and needs adjustment in `gitea_client.py`.
- Configure `GOOGLE_AI_ENDPOINT` and `GOOGLE_API_KEY` to point to your Generative AI Studio model endpoint.
Security
- Keep `GITEA_TOKEN` and `GOOGLE_API_KEY` secret and prefer injecting via environment or secret manager.

16
Dockerfile Normal file
View File

@@ -0,0 +1,16 @@
FROM python:3.11-slim
WORKDIR /app
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY gitea_bot ./gitea_bot
ENV PYTHONUNBUFFERED=1
EXPOSE 8000
CMD ["uvicorn", "gitea_bot.main:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -1,3 +1,3 @@
# KARL
# KARL der Computer
Karl der Computer is a AI agent to review PRs.
Karl is a AI agent to review PRs.

24
docker-compose.yml Normal file
View File

@@ -0,0 +1,24 @@
version: '3.8'
services:
gitea-bot:
build: .
image: gitea-bot:latest
env_file:
- .env
ports:
- "8000:8000"
restart: unless-stopped
healthcheck:
test: [ "CMD-SHELL", "curl -f http://localhost:8000/ || exit 1" ]
interval: 30s
timeout: 10s
retries: 3
poller:
build: .
image: gitea-bot:latest
env_file:
- .env
command: [ "python", "-u", "-m", "gitea_bot.poller" ]
restart: unless-stopped
depends_on:
- gitea-bot

0
gitea_bot/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,21 @@
import os
import google.genai as genai
class GeminiClient:
def __init__(self):
self.api_key = os.getenv("GOOGLE_API_KEY")
if not self.api_key:
raise RuntimeError("GOOGLE_API_KEY must be set")
# Google Developer AI model (configurable via env).
self.model = os.getenv("GOOGLE_MODEL", "gemini-2.5-pro")
self.client = genai.Client(api_key=self.api_key)
def generate_review(self, prompt: str) -> str:
"""Send prompt to Gemini and return the review."""
response = self.client.models.generate_content(
model=self.model,
contents=prompt
)
return response.text

93
gitea_bot/gitea_client.py Normal file
View File

@@ -0,0 +1,93 @@
import requests
from typing import Iterator, List
class GiteaClient:
def __init__(self, api_url: str, token: str):
self.api_url = api_url.rstrip("/")
self.token = token
def _headers(self):
return {"Authorization": f"token {self.token}", "Content-Type": "application/json"}
def available_repositories(self) -> Iterator[tuple[str, str]]:
"""List all repository URLs available to the token."""
url = f"{self.api_url}/user/repos"
r = requests.get(url, headers=self._headers(), timeout=30)
r.raise_for_status()
for repo in r.json():
owner = repo.get("owner", {}).get("login")
name = repo.get("name")
if owner and name: # Skip repos with missing owner or name
yield owner, name
else:
print(f"Warning: Skipping repo with missing owner or name: {repo}")
def list_pull_request_files(self, owner: str, repo: str, pr_number: int) -> List[dict]:
"""Try to list changed files for a pull request. If the endpoint differs, adjust."""
# Many Gitea instances expose PR files at /repos/{owner}/{repo}/pulls/{index}/files
url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{pr_number}/files"
r = requests.get(url, headers=self._headers(), timeout=30)
if r.status_code == 200:
return r.json()
# Fallback: try issues comments or single PR object
r.raise_for_status()
def get_pull_request_diff(self, owner: str, repo: str, pr_number: int) -> str:
"""Fetch unified diff text for a pull request."""
url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{pr_number}.diff"
headers = {"Authorization": f"token {self.token}"}
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
return r.text
def create_issue_comment(self, owner: str, repo: str, issue_index: int, body: str) -> dict:
url = f"{self.api_url}/repos/{owner}/{repo}/issues/{issue_index}/comments"
r = requests.post(url, headers=self._headers(), json={"body": body}, timeout=30)
r.raise_for_status()
return r.json()
def list_open_pull_requests(self, owner: str, repo: str) -> List[dict]:
"""List open pull requests for a repository."""
url = f"{self.api_url}/repos/{owner}/{repo}/pulls?state=open"
r = requests.get(url, headers=self._headers(), timeout=30)
r.raise_for_status()
return r.json()
def list_repos_for_owner(self, owner: str) -> List[dict]:
"""Try to list repos for an owner (org or user). Returns list of repo dicts."""
# Try orgs endpoint first
url_org = f"{self.api_url}/orgs/{owner}/repos"
r = requests.get(url_org, headers=self._headers(), timeout=30)
if r.status_code == 200:
return r.json()
# Fallback to users endpoint
url_user = f"{self.api_url}/users/{owner}/repos"
r = requests.get(url_user, headers=self._headers(), timeout=30)
r.raise_for_status()
return r.json()
def create_pull_request_review(self, owner: str, repo: str, pr_number: int, body: str, comments: List[dict] = None) -> dict:
"""Create a PR review with optional line-specific comments.
Args:
owner: Repository owner
repo: Repository name
pr_number: PR number/index
body: General review comment
comments: List of line comments. Each comment dict should have:
- path: file path
- new_position: line number in new version
- body: comment text
"""
url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{pr_number}/reviews"
payload = {
"body": body,
"event": "COMMENT"
}
if comments:
payload["comments"] = comments
r = requests.post(url, headers=self._headers(), json=payload, timeout=30)
r.raise_for_status()
return r.json()

304
gitea_bot/poller.py Normal file
View File

@@ -0,0 +1,304 @@
import os
import time
import json
import re
from pathlib import Path
from typing import List, Optional
import requests
from gitea_client import GiteaClient
from gemini_client import GeminiClient
from dotenv import load_dotenv
# Load environment variables from parent directory .env (project root)
env_path = Path(__file__).resolve().parents[1] / ".env"
if env_path.exists():
load_dotenv(dotenv_path=env_path)
# Configuration
API_URL = os.getenv("GITEA_API_URL")
TOKEN = os.getenv("GITEA_TOKEN")
BOT = os.getenv("BOT_USERNAME")
POLL_INTERVAL = int(os.getenv("POLL_INTERVAL", "60"))
POLL_OWNER = os.getenv("POLL_OWNER")
POLL_REPOS = os.getenv("POLL_REPOS") # comma-separated owner/repo
ROOT = Path(__file__).resolve().parent.parent
SEEN_PATH = ROOT / ".poller_seen.json"
if not (API_URL and TOKEN and BOT):
raise RuntimeError("GITEA_API_URL, GITEA_TOKEN and BOT_USERNAME must be set for poller")
gitea = GiteaClient(API_URL, TOKEN)
gemini = GeminiClient()
def load_seen() -> set:
if SEEN_PATH.exists():
try:
with open(SEEN_PATH, "r", encoding="utf-8") as f:
return set(tuple(x) for x in json.load(f))
except Exception:
return set()
return set()
def save_seen(seen: set):
with open(SEEN_PATH, "w", encoding="utf-8") as f:
json.dump([list(x) for x in seen], f)
def build_prompt_from_file(file_dict: dict) -> str:
"""Build a structured prompt for reviewing a single file diff."""
filename = file_dict.get("filename") or file_dict.get("path") or "unknown"
patch = file_dict.get("patch") or file_dict.get("diff") or ""
if len(patch) > 30000:
patch = patch[:30000] + "\n...TRUNCATED..."
prompt = (
"You are a senior code reviewer. Analyze exactly one file diff and return ONLY JSON.\n"
"You review C++ code with the Qt framework\n"
"Rules:\n"
"1) Only report real issues or actionable improvements.\n"
"2) Use diff positions (line index in the unified diff hunk) for comment anchoring.\n"
"3) Keep each comment short and specific.\n"
"4) If there are no findings, return an empty findings array.\n\n"
"JSON schema:\n"
"{\n"
" \"summary\": \"short summary\",\n"
" \"findings\": [\n"
" {\n"
" \"diff_position\": 12,\n"
" \"severity\": \"high|medium|low\",\n"
" \"comment\": \"text\"\n"
" }\n"
" ]\n"
"}\n\n"
f"File: {filename}\n"
"Unified diff:\n"
f"{patch}"
)
return prompt
def extract_json_object(text: str) -> Optional[dict]:
"""Extract a JSON object from model output, including fenced JSON blocks."""
if not text:
return None
raw = text.strip()
if raw.startswith("```"):
lines = raw.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].strip() == "```":
raw = "\n".join(lines[1:-1]).strip()
if raw.startswith("json"):
raw = raw[4:].strip()
try:
data = json.loads(raw)
return data if isinstance(data, dict) else None
except json.JSONDecodeError:
pass
start = raw.find("{")
end = raw.rfind("}")
if start == -1 or end == -1 or end <= start:
return None
candidate = raw[start:end + 1]
try:
data = json.loads(candidate)
return data if isinstance(data, dict) else None
except json.JSONDecodeError:
return None
def parse_structured_review(ai_response: str) -> dict:
"""Parse model output into normalized review structure."""
parsed = extract_json_object(ai_response) or {}
summary = str(parsed.get("summary") or "No summary provided.").strip()
findings_raw = parsed.get("findings") or []
findings = []
if isinstance(findings_raw, list):
for item in findings_raw:
if not isinstance(item, dict):
continue
try:
diff_position = int(item.get("diff_position"))
except (TypeError, ValueError):
continue
comment = str(item.get("comment") or "").strip()
severity = str(item.get("severity") or "low").strip().lower()
if not comment:
continue
findings.append(
{
"diff_position": diff_position,
"severity": severity,
"comment": comment,
}
)
return {"summary": summary, "findings": findings}
def split_unified_diff_by_file(unified_diff: str) -> dict:
"""Split a PR unified diff into per-file diff chunks keyed by new path."""
file_diffs = {}
current_lines: List[str] = []
current_path: Optional[str] = None
def flush_current() -> None:
if current_path and current_lines:
file_diffs[current_path] = "\n".join(current_lines).strip()
for line in unified_diff.splitlines():
if line.startswith("diff --git "):
flush_current()
current_lines = [line]
current_path = None
continue
if current_lines is not None:
current_lines.append(line)
# Example: +++ b/src/main.cpp
if line.startswith("+++ "):
raw_path = line[4:].strip()
if raw_path == "/dev/null":
# Deleted file; fallback to old path if needed.
continue
current_path = raw_path[2:] if raw_path.startswith("b/") else raw_path
# Fallback for rename/deletion edge cases.
if current_path is None and line.startswith("diff --git "):
match = re.match(r"diff --git a/(.+?) b/(.+)", line)
if match:
current_path = match.group(2)
flush_current()
return file_diffs
def handle_assignment(owner: str, repo: str, pr: dict):
pr_number = pr.get("number") or pr.get("index") or pr.get("id")
try:
files = gitea.list_pull_request_files(owner, repo, pr_number)
except Exception as e:
print(f"failed to fetch files for {owner}/{repo}#{pr_number}: {e}")
return False
if not files:
print(f"No files found for {owner}/{repo}#{pr_number}")
return False
# Some Gitea setups return filenames but no patch in /pulls/{n}/files.
fallback_patches = {}
if files and all(not (f.get("patch") or f.get("diff") or "").strip() for f in files):
try:
unified_diff = gitea.get_pull_request_diff(owner, repo, pr_number)
fallback_patches = split_unified_diff_by_file(unified_diff)
print(
f"Loaded fallback unified diff for {owner}/{repo}#{pr_number} "
f"({len(fallback_patches)} file patches)"
)
except Exception as e:
print(f"failed to load fallback diff for {owner}/{repo}#{pr_number}: {e}")
# Analyze each file individually based on its diff.
review_comments: List[dict] = []
file_summaries: List[str] = []
for file_dict in files:
filename = file_dict.get("filename") or file_dict.get("path")
if not filename:
continue
patch = file_dict.get("patch") or file_dict.get("diff") or ""
if not patch.strip() and fallback_patches:
patch = fallback_patches.get(filename, "")
if not patch.strip():
file_summaries.append(f"**{filename}**: No textual diff available.")
continue
file_for_prompt = dict(file_dict)
file_for_prompt["patch"] = patch
print(f"Analyzing {filename} for {owner}/{repo}#{pr_number}")
prompt = build_prompt_from_file(file_for_prompt)
try:
ai_response = gemini.generate_review(prompt)
parsed_review = parse_structured_review(ai_response)
file_summaries.append(f"**{filename}**: {parsed_review['summary']}")
for finding in parsed_review["findings"]:
severity = finding["severity"].upper()
body = f"[{severity}] {finding['comment']}"
review_comments.append({
"path": filename,
"new_position": finding["diff_position"],
"body": body,
})
except Exception as e:
print(f"failed to generate review for {filename}: {e}")
file_summaries.append(f"**{filename}**: Error analyzing file - {e}")
# Create one PR review containing summary + line-anchored comments.
review_body = "AI Code Review\n\n" + "\n".join(file_summaries)
try:
gitea.create_pull_request_review(
owner, repo, pr_number,
body=review_body,
comments=review_comments if review_comments else None
)
print(f"Posted review for {owner}/{repo}#{pr_number} with {len(review_comments)} line comments")
return True
except Exception as e:
print(f"failed to post review for {owner}/{repo}#{pr_number}: {e}")
return False
def run():
seen = load_seen()
print("Starting poller; checking repos...")
try:
while True:
repos = list(gitea.available_repositories())
print(f"Found {len(repos)} accessible repositories")
for owner, repo in repos:
try:
prs = gitea.list_open_pull_requests(owner, repo)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
# Repo exists but is not accessible (permission or deleted)
continue
print(f"failed to list PRs for {owner}/{repo}: {e}")
continue
except Exception as e:
print(f"failed to list PRs for {owner}/{repo}: {e}")
continue
for pr in prs:
key = (f"{owner}/{repo}", pr.get("number"))
reviewers = [r.get("login") or r.get("username") for r in (pr.get("requested_reviewers") or [])]
if BOT in reviewers and key not in seen:
print(f"Detected assignment: {key}")
ok = handle_assignment(owner, repo, pr)
if ok:
seen.add(key)
save_seen(seen)
time.sleep(POLL_INTERVAL)
except KeyboardInterrupt:
print("Poller stopped")
if __name__ == "__main__":
run()

5
requirements.txt Normal file
View File

@@ -0,0 +1,5 @@
fastapi==0.95.2
uvicorn[standard]==0.22.0
requests==2.31.0
python-dotenv==1.0.1
google-genai