2024-07-29 17:21:14 -07:00
|
|
|
import subprocess
|
|
|
|
|
import tarfile
|
|
|
|
|
import os
|
2024-07-30 18:00:31 -07:00
|
|
|
import time
|
2025-10-20 16:43:23 -07:00
|
|
|
|
|
|
|
|
import click
|
2024-08-21 17:49:46 -07:00
|
|
|
import requests
|
2024-07-29 17:21:14 -07:00
|
|
|
|
2024-07-30 18:00:31 -07:00
|
|
|
LAST_BUILD_CUTOFF = 3 # how many days ago to consider a build outdated
|
2024-07-29 17:21:14 -07:00
|
|
|
PENDING_FILES_PATH = "pending_files.txt"
|
2024-07-31 14:49:26 -07:00
|
|
|
ENVIRONMENT_PICKLE = "_build/doctrees/environment.pickle"
|
2025-10-20 16:43:23 -07:00
|
|
|
DOC_BUILD_CACHE_URL = "https://rayci.anyscale.dev/ray/doc/build-cache"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _build_cache_url(commit: str):
|
|
|
|
|
return f"{DOC_BUILD_CACHE_URL}/{commit}.tgz"
|
2024-07-29 17:21:14 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def find_latest_master_commit():
|
|
|
|
|
"""Find latest commit that was pushed to origin/master that is also on local env."""
|
2024-08-21 17:49:46 -07:00
|
|
|
latest_commits = (
|
2024-07-30 13:25:35 -07:00
|
|
|
subprocess.check_output(
|
|
|
|
|
[
|
|
|
|
|
"git",
|
2024-08-21 17:49:46 -07:00
|
|
|
"log",
|
|
|
|
|
"-n",
|
|
|
|
|
"100",
|
|
|
|
|
"--format=%H",
|
2024-07-30 13:25:35 -07:00
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
.strip()
|
|
|
|
|
.decode("utf-8")
|
2024-08-21 17:49:46 -07:00
|
|
|
.split("\n")
|
|
|
|
|
)
|
|
|
|
|
for commit in latest_commits:
|
2025-10-20 16:43:23 -07:00
|
|
|
with requests.head(_build_cache_url(commit), allow_redirects=True) as response:
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
return commit
|
2024-08-21 17:49:46 -07:00
|
|
|
raise Exception(
|
2025-10-20 16:43:23 -07:00
|
|
|
"No cache found for latest master commit. "
|
2024-08-21 17:49:46 -07:00
|
|
|
"Please merge with upstream master or use 'make develop'."
|
2024-07-30 13:25:35 -07:00
|
|
|
)
|
2024-07-29 17:21:14 -07:00
|
|
|
|
|
|
|
|
|
2025-10-20 16:43:23 -07:00
|
|
|
def fetch_cache(commit, target_file_path):
|
2024-07-29 17:21:14 -07:00
|
|
|
"""
|
2025-10-20 16:43:23 -07:00
|
|
|
Fetch doc cache archive from rayci.anyscale.dev
|
2024-07-29 17:21:14 -07:00
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
commit: The commit hash of the doc cache to fetch
|
|
|
|
|
target_file_path: The file path to save the doc cache archive
|
|
|
|
|
"""
|
2025-10-20 16:43:23 -07:00
|
|
|
|
|
|
|
|
with requests.get(
|
|
|
|
|
_build_cache_url(commit), allow_redirects=True, stream=True
|
|
|
|
|
) as response:
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
with open(target_file_path, "wb") as f:
|
|
|
|
|
for chunk in response.iter_content(chunk_size=8192):
|
|
|
|
|
f.write(chunk)
|
|
|
|
|
print(f"Successfully downloaded {target_file_path}")
|
2024-07-29 17:21:14 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_cache(cache_path: str, doc_dir: str):
|
|
|
|
|
"""
|
|
|
|
|
Extract the doc cache archive to overwrite the ray/doc directory
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
file_path: The file path of the doc cache archive
|
|
|
|
|
"""
|
|
|
|
|
with tarfile.open(cache_path, "r:gz") as tar:
|
|
|
|
|
tar.extractall(doc_dir)
|
|
|
|
|
print(f"Extracted {cache_path} to {doc_dir}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_changed_and_added_files(ray_dir: str, latest_master_commit: str):
|
|
|
|
|
"""
|
|
|
|
|
List all changed and added untracked files in the repo.
|
|
|
|
|
This is to prevent cache environment from updating timestamp of these files.
|
|
|
|
|
"""
|
|
|
|
|
untracked_files = (
|
|
|
|
|
subprocess.check_output(
|
|
|
|
|
["git", "ls-files", "--others"],
|
|
|
|
|
cwd=ray_dir,
|
|
|
|
|
)
|
|
|
|
|
.decode("utf-8")
|
|
|
|
|
.split(os.linesep)
|
|
|
|
|
)
|
|
|
|
|
modified_files = (
|
|
|
|
|
subprocess.check_output(
|
|
|
|
|
["git", "ls-files", "--modified"],
|
|
|
|
|
cwd=ray_dir,
|
|
|
|
|
)
|
|
|
|
|
.decode("utf-8")
|
|
|
|
|
.split(os.linesep)
|
|
|
|
|
)
|
|
|
|
|
diff_files_with_master = (
|
|
|
|
|
subprocess.check_output(
|
|
|
|
|
["git", "diff", "--name-only", latest_master_commit],
|
|
|
|
|
cwd=ray_dir,
|
|
|
|
|
)
|
|
|
|
|
.decode("utf-8")
|
|
|
|
|
.split(os.linesep)
|
|
|
|
|
)
|
|
|
|
|
filenames = []
|
|
|
|
|
for file in untracked_files + modified_files + diff_files_with_master:
|
2024-08-13 13:06:22 -07:00
|
|
|
filename = file
|
2024-07-29 17:21:14 -07:00
|
|
|
if filename.startswith("doc/"): # Remove "doc/" prefix
|
|
|
|
|
filename = filename.replace("doc/", "")
|
2024-07-30 13:25:35 -07:00
|
|
|
if filename.startswith("source/"): # Remove "doc/" prefix
|
|
|
|
|
filename = filename.replace("source/", "")
|
2024-07-29 17:21:14 -07:00
|
|
|
filenames.append(filename)
|
|
|
|
|
return filenames
|
|
|
|
|
|
|
|
|
|
|
2024-07-30 18:00:31 -07:00
|
|
|
def should_load_cache(ray_dir: str):
|
|
|
|
|
"""
|
|
|
|
|
Check if cache should be loaded based on the timestamp of last build.
|
|
|
|
|
"""
|
|
|
|
|
ray_doc_dir = os.path.join(ray_dir, "doc")
|
|
|
|
|
if not os.path.exists(f"{ray_doc_dir}/{ENVIRONMENT_PICKLE}"):
|
|
|
|
|
print("Doc build environment pickle file does not exist.")
|
|
|
|
|
return True
|
|
|
|
|
last_build_time = os.path.getmtime(f"{ray_doc_dir}/{ENVIRONMENT_PICKLE}")
|
|
|
|
|
current_time = time.time()
|
|
|
|
|
# Load cache if last build was more than LAST_BUILD_CUTOFF days ago
|
|
|
|
|
print("time diff: ", current_time - last_build_time)
|
|
|
|
|
if current_time - last_build_time > LAST_BUILD_CUTOFF * 60 * 60 * 24:
|
|
|
|
|
print(f"Last build was more than {LAST_BUILD_CUTOFF} days ago.")
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2024-07-29 17:21:14 -07:00
|
|
|
@click.command()
|
|
|
|
|
@click.option("--ray-dir", default="/ray", help="Path to Ray repo")
|
|
|
|
|
def main(ray_dir: str) -> None:
|
2024-07-30 18:00:31 -07:00
|
|
|
if not should_load_cache(ray_dir):
|
|
|
|
|
print("Skip loading global cache...")
|
|
|
|
|
return
|
|
|
|
|
print("Loading global cache ...")
|
2024-07-29 17:21:14 -07:00
|
|
|
latest_master_commit = find_latest_master_commit()
|
|
|
|
|
# List all changed and added files in the repo
|
|
|
|
|
filenames = list_changed_and_added_files(ray_dir, latest_master_commit)
|
|
|
|
|
with open(
|
|
|
|
|
f"{ray_dir}/{PENDING_FILES_PATH}", "w"
|
|
|
|
|
) as f: # Save to file to be used when updating cache environment
|
|
|
|
|
f.write("\n".join(filenames))
|
|
|
|
|
|
|
|
|
|
cache_path = f"{ray_dir}/doc.tgz"
|
2025-10-20 16:43:23 -07:00
|
|
|
# Fetch cache of that commit from build cache archive to cache_path
|
|
|
|
|
print(f"Use build cache for commit {latest_master_commit}")
|
|
|
|
|
fetch_cache(latest_master_commit, cache_path)
|
2024-07-29 17:21:14 -07:00
|
|
|
# Extract cache to override ray/doc directory
|
|
|
|
|
extract_cache(cache_path, f"{ray_dir}/doc")
|
2024-07-30 18:00:31 -07:00
|
|
|
os.remove(cache_path)
|
2024-07-29 17:21:14 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|