(****************************************************************************** * * context-mirror-bot -- Grab the official Context source code from the * official release and import it into a git repository that is hosted * online. * Copyright (C) 2014–2018 Philipp Gesang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * ****************************************************************************** * * Contact: Philipp Gesang * Repo: https://gitlab.com/phgsng/context-mirror-bot * *****************************************************************************) let t_start = Unix.gettimeofday () let self = Filename.basename Sys.argv.(0) type context_src = { name : string; url : string; page : string; } type download = { branch : string; ts : float; file : string; dst : string; } type source_status = Stagnation | New_zip of download type extract_status = Extract_ignored of string | Extract_failed of string | Path of download type import_status = Import_ignored of string | Import_failed of string | Imported of string type timestamp_status = Changed of float | Unchanged type status = Good | Bad of errmsg and errmsg = string exception Bad_env of string exception Prepare_failed of string (* * let bootstrap_repo = "git://repo.or.cz/context.git" (* Marius’s mirror *) *) (* let bootstrap_repo = "git@bb-git-mirror:phg/context-mirror.git" (* our mirror *) *) let bootstrap_repo = "git@bitbucket.org:phg/context-mirror.git" (* our mirror *) let garden_repo = "http://git.contextgarden.net/context/context.git" (* official, but broken *) (*let mirror_repo = "git@bitbucket.org:phg/context-mirror.git" [> our mirror <]*) let mirror_repos = [ "old" , "hg@bitbucket.org:phg/context-mirror.git" (* our old mirror *) ; "new" , "git@gitlab.com:context-mirror-bot/context-mirror.git" (* our current mirror *) ; "ghub", "git@github.com:contextgarden/context-mirror.git" ] (* github mirror *) let sources = (* No “current” anymore as of February 2020. *) [ { name = "beta"; page = "http://www.pragma-ade.nl/download-1.htm"; url = "http://www.pragma-ade.nl/context/latest/cont-tmf.zip"; } ] (* owner and contact details for the user agent string *) let bot_owner = "Philipp Gesang" let owner_contact = "phg+robots@phi-gamma.net" (*let repo_root = "/home/mirror/mirror"*) let repo_root = "/home/phg/src/context-mirror" let repo_subdir = "context-git" let repo_dir = Filename.concat repo_root repo_subdir let git_dir = Filename.concat repo_dir ".git" let git_config_file = Filename.concat git_dir "config" let ssh_wrapper = Filename.concat repo_root "git-ssh-wrapper.sh" let ssh_id = "context_mirror_bot_id_rsa" let ssh_id_dst = (Filename.concat repo_root ssh_id) let static_files = [ ("context-license.txt", "COPYING"); ("context-readme.rst", "README.rst") ] let gitconfig = let repo_config = Printf.sprintf "\ [core] repositoryformatversion = 0 filemode = true bare = false logallrefupdates = true [push] default = matching [user] email = phg@phi-gamma.net name = Context Git Mirror Bot [bitbucket] user = context-mirror-bot token = [remote \"initial-source\"] url = %s fetch = +refs/heads/*:refs/remotes/initial-source/* [branch \"origin\"] remote = initial-source merge = refs/heads/origin [remote \"garden\"] url = %s fetch = +refs/heads/*:refs/remotes/garden/* [branch \"official\"] remote = garden merge = refs/heads/origin " bootstrap_repo garden_repo and dst_config = String.concat "\n" (List.map (fun (name, url) -> let rem = "mirror-"^name in Printf.sprintf "\ [remote \"%s\"] url = %s fetch = +refs/heads/*:refs/remotes/%s/* " rem url rem) mirror_repos) in repo_config ^ dst_config (* " [branch \"beta\"] remote = %s merge = refs/heads/beta [branch \"current\"] remote = %s merge = refs/heads/current " *) let ssh_wrapper_script = Printf.sprintf "\ #!/bin/sh ## ## This file was created automatically. Changes will be overwritten ## with the next invocation of the mirror bot. ## keyfile=\"%s\" if test ! -f \"${keyfile}\"; then echo \"[Git SSH Wrapper: ERROR] No SSH key file at ${keyfile}.\" exit 1 fi ssh -i \"${keyfile}\" $@ " ssh_id_dst (****************************************************************************** * auxiliary functions *****************************************************************************) let now () = Unix.gettimeofday () let format_time t = let open Unix in let ut = localtime t in Printf.sprintf "%04d-%02d-%02d %02d:%02d:%02d" (ut.tm_year + 1900) (ut.tm_mon + 1) ut.tm_mday ut.tm_hour ut.tm_min ut.tm_sec let timestamp () = format_time (Unix.gettimeofday ()) let verbosity = ref 0 let set_verbosity n = verbosity := n let msg ?lev:(lev=0) = Printf.( if !verbosity < lev then ifprintf stdout else let fin oc = output_char oc '\n'; flush oc in let pp oc = kfprintf fin stdout in kfprintf pp stdout "[bot %s] %!" (timestamp ()) ) let err str = Printf.eprintf "[bot %s: error] %s\n%!" (timestamp ()) str let logger = Syslog.openlog self let log_info msg = Syslog.syslog logger `LOG_INFO msg let log_debug msg = Syslog.syslog logger `LOG_DEBUG msg let dir_exists path = try let dh = Unix.opendir path in Unix.closedir dh; true with | Unix.Unix_error _ -> false let file_exists path = try let fd = Unix.openfile path [ Unix.O_RDONLY ] 0 in Unix.close fd; true with | Unix.Unix_error _ -> false (** * The git mirror uses periods to separate dates whereas the Context website * uses dashes: * * Marius: 2014.02.13 11:27 * Pragma: 2014-02-14 17:07 * * We’re going to use dashes to for some extra consistency. Besides, that * period-separated date format is weird to begin with. *) let re_date_find = Str.regexp "[0-9][0-9][0-9][0-9][.-][0-1][0-9][.-][0-3][0-9] [0-2][0-9]:[0-6][0-9]" let re_date_match = Str.regexp "\\([0-9][0-9][0-9][0-9]\\)[.-]\\([0-1][0-9]\\)[.-]\\([0-3][0-9]\\) \\([0-2][0-9]\\):\\([0-6][0-9]\\)" let is_timestamp str = try ignore (Str.search_forward re_date_find str 0); true with | Not_found -> false let tm_of_timestamp ?pos:(pos=0) ts = (* this is dirty *) assert (Str.string_match re_date_match ts pos); let open Unix in let (t, _) = mktime { tm_sec = 0; tm_min = int_of_string (Str.matched_group 5 ts); tm_hour = int_of_string (Str.matched_group 4 ts); tm_mday = int_of_string (Str.matched_group 3 ts); tm_mon = int_of_string (Str.matched_group 2 ts) - 1; tm_year = int_of_string (Str.matched_group 1 ts) - 1900; tm_wday = -1; tm_yday = -1; tm_isdst = true; } in t let list_of_pipe cmd = let open Unix in (* We’re only gonna read from the command’s stdout but * keep the stderr around so as to prevent error messages * from flooding the terminal. *) msg ~lev:1 "exec “%s”" cmd; let (so, si, se) = open_process_full cmd (environment ()) in let rec aux acc = try aux ((input_line so) :: acc) with | End_of_file -> let stat = close_process_full (so, si, se) in (stat, List.rev acc) in match aux [] with | (WEXITED code, l) when code = 0 -> Some l | (WEXITED _, _) | (WSIGNALED _, _) | (WSTOPPED _, _) -> None let exec_commands cmds = let rec aux = function | [] -> Good | cmd :: cmds -> begin match list_of_pipe cmd with | Some _ -> aux cmds | None -> Bad (Printf.sprintf "Failed to execute command “%s”." cmd) end in aux cmds let unzip fname dst = list_of_pipe (Printf.sprintf "unzip -o -d %s %s" dst fname) let source_known name srcs = try ignore (List.find (fun src -> src.name = name) srcs); true with | Not_found -> false let forall_sources f = (* cause side effects for sources *) List.iter f sources (****************************************************************************** * git integration *****************************************************************************) module Git : sig val init : unit -> bool val set_path : string -> unit val is_repo : unit -> bool val last_commit : string -> float val import : extract_status -> import_status val push : string -> status val install_files : unit -> unit val fetch : string -> unit val pull : ?rbr:string -> string -> unit exception Git_checkout_failed of string end = struct exception Git_init_failed exception Git_add_failed exception Git_log_failed exception Git_checkout_failed of string exception Git_branch_failed of string exception Git_commit_failed of string exception Git_fetch_failed of string exception Git_pull_failed of string exception Import_files_failed exception Copy_ssh_key_failed let write_config () = msg ~lev:1 "Write git configuration to [%s]" git_config_file; let oc = open_out_gen [Open_wronly; Open_creat; Open_trunc] 0o600 git_config_file in output_string oc gitconfig; close_out oc let install_ssh_wrapper () = match list_of_pipe (Printf.sprintf "cp -f %s %s" ssh_id ssh_id_dst) with | None -> raise Copy_ssh_key_failed | Some _ -> begin let oc = open_out_gen [Open_wronly; Open_creat; Open_trunc] 0o700 ssh_wrapper in output_string oc ssh_wrapper_script; close_out oc end let install_files () = write_config (); install_ssh_wrapper () let import_static_files () = let rec aux = function | [] -> true | (src, dst) :: tl -> begin let fullsrc = Filename.concat repo_root src in let fulldst = Filename.concat repo_dir dst in match list_of_pipe (Printf.sprintf "cp -f %s %s" fullsrc fulldst) with | None -> false | Some _ -> aux tl end in aux static_files let init () = msg "Initializing new repository with state from [%s]" bootstrap_repo; match exec_commands [ "git init"; "git remote add initial-source " ^ bootstrap_repo; "git pull initial-source origin"; "git checkout origin"; ] with | Bad _ -> raise Git_init_failed | Good -> install_files (); true let fetch remote = msg "Fetching remote [%s]" remote; match list_of_pipe ("git fetch " ^ remote) with | None -> raise (Git_fetch_failed remote) | Some _ -> () let pull ?rbr:(rbr="mirror") br = msg ~lev:1 "tentatively checkout new branch [%s] from remote [%s]" br rbr; let () = match list_of_pipe (* this fails if the branch already exists *) (Printf.sprintf "git checkout -b \"%s\" \"%s\"" br rbr) with | Some _ -> msg "new branch [%s] created" br | None -> (* branch exists, do regular checkout *) begin msg ~lev:1 "branch [%s] already exists; performing normal checkout" br; match list_of_pipe (Printf.sprintf "git checkout \"%s\"" br) with | None -> raise (Git_checkout_failed br) | Some _ -> () end in match list_of_pipe ("git pull --ff") with | Some _ -> msg ~lev:1 "pulled branch %s just fine" br | None -> raise (Git_pull_failed (Printf.sprintf "%s (remote: %s)" br rbr)) let last_commit br = match list_of_pipe ("git checkout " ^ br) with | None -> raise (Git_checkout_failed br) | Some _ -> begin match list_of_pipe "git log --max-count=1 --format=%B" with | None | Some [] -> raise Git_log_failed | Some (hd :: _) -> let pos = try Str.search_forward re_date_find hd 0 with Not_found -> err (Printf.sprintf "Timestamp %s of branch %s is not well formed." hd br); raise Git_log_failed in tm_of_timestamp ~pos:pos hd end let set_path path = msg ~lev:1 "GIT_WORK_TREE=%s" path; msg ~lev:1 "GIT_DIR=%s" git_dir; msg ~lev:1 "GIT_SSH=%s" ssh_wrapper; Unix.putenv "GIT_WORK_TREE" path; Unix.putenv "GIT_DIR" git_dir; Unix.putenv "GIT_SSH" ssh_wrapper let is_repo () = match list_of_pipe "git status" with | Some _ -> true | None -> false let gc () = msg ~lev:1 "Running git garbage collector."; ignore (list_of_pipe "git gc") let import status = match status with | Extract_ignored msg -> Import_ignored msg | Extract_failed branch -> Import_ignored (Printf.sprintf "Extracting branch %s failed." branch) | Path { branch = branch; ts = ts; file = _; dst = dst } -> begin ignore (list_of_pipe "git checkout origin"); let needs_branch = match list_of_pipe ("git checkout " ^ branch) with | None -> true | Some _ -> false in msg "Importing [%s] into [%s] branch [%s]." dst (if needs_branch then "new" else "old") branch; match exec_commands [ (* remove current contents and replace it with the extracted files *) (Printf.sprintf "rm -rf -- %s/*" repo_dir); (Printf.sprintf "mv %s/* %s/" dst repo_dir); ] with | Bad errmsg -> Import_failed errmsg | Good -> if import_static_files () = false then raise Import_files_failed else (* -> git add; git commit *) begin match exec_commands [ (Printf.sprintf "git add -A %s/" repo_dir); "git add --update"; ] with | Bad errmsg -> Import_failed errmsg | Good -> begin if needs_branch then begin match list_of_pipe (Printf.sprintf "git checkout -b %s" branch) with | Some _ -> () | None -> raise (Git_branch_failed branch) end; let cmd = Printf.sprintf "git commit --no-gpg-sign --author=\"Hans Hagen \" --message=\"%s\"" (format_time ts) in let () = log_info cmd in match list_of_pipe cmd with | Some _ -> gc (); Imported branch | None -> raise (Git_commit_failed branch) end end end let push br = match list_of_pipe ("git checkout " ^ br) with | None -> raise (Git_checkout_failed br) | Some _ -> begin let rec aux = function | [] -> Good | (rem, _) :: rest -> let cmd = Printf.sprintf "git push \"mirror-%s\" \"%s:%s\"" rem br br in match list_of_pipe cmd with | None -> Bad (Printf.sprintf "git-push: [%s] failed" cmd) | Some _ -> aux rest in aux mirror_repos end end (****************************************************************************** * preparation *****************************************************************************) exception Bad_file of string let ensure_dir d = let open Unix in try let stat = stat d in match stat.st_kind with | S_DIR -> () | _ -> raise (Bad_file "“%d” is not a directory.") with | Unix_error (err, _, _) when err = ENOENT -> msg "Creating directory [%s]" d; FileUtil.mkdir ~parent:true d let ensure_dst_repo d = if not (Git.is_repo ()) then if Git.init () then `New else `Fail else begin msg "Found existing repository at [%s]" d; `Exists end let copy_static_files () = List.iter (fun (src, _) -> let dst = Filename.concat repo_root src in if file_exists dst then msg ~lev:1 "File [%s] exists at [%s], skipping." src dst else begin msg ~lev:1 "Install [%s] exists to [%s]" src dst; match list_of_pipe (Printf.sprintf "cp -f %s %s" src dst) with | None -> raise (Prepare_failed (Printf.sprintf "Error preparing file %s." src)) | Some _ -> () end) static_files let prepare_dst () = try ensure_dir repo_dir; copy_static_files (); ensure_dst_repo repo_dir with | _ -> `Fail (****************************************************************************** * source download *****************************************************************************) let user_agent = Printf.sprintf "Mirror Bot operated by %s <%s>" bot_owner owner_contact let grab url writer = let open Curl in let c = init () in set_url c url; setopt c (CURLOPT_USERAGENT user_agent); set_followlocation c true; set_writefunction c writer; perform c; cleanup c let grab_string url = let buf = Buffer.create 16 in grab url (fun str -> Buffer.add_string buf str; String.length str); Buffer.contents buf let grab_file url file = let chan = open_out file in grab url (fun str -> output_string chan str; String.length str); close_out chan let mk_tmp_webpage name = Filename.concat repo_root (Printf.sprintf "context_download_%s.tmp.html" name) let extract_timestamp src = let tmp = mk_tmp_webpage src.name in grab_file src.page tmp; let input = open_in tmp in let xi = Xmlm.make_input (`Channel input) in let is_candidate name attrs = if name <> "div" then false else begin try let _ = List.find (fun ((_, name), str) -> name = "class" && str = "more-room") attrs in true with | Not_found -> false end in let rec pull xi depth = (*Printf.printf ">> depth %d\n" depth;*) match Xmlm.input xi with | `El_start ((_, name), attrs) when is_candidate name attrs -> begin match Xmlm.peek xi with | `Data str when is_timestamp str -> Some str | _ -> pull xi (depth + 1) end | `El_start _ -> pull xi (depth + 1) | `El_end -> if depth = 1 then None else pull xi (depth - 1) | `Data _ -> pull xi depth | `Dtd _ -> pull xi depth in let res = pull xi 0 in close_in input; (*if not (Xmlm.eoi xi) then*) (*invalid_arg (Printf.sprintf "document %s not well-formed" src.url)*) match res with | None -> invalid_arg (Printf.sprintf "%s does not contain a date string" src.url) | Some dstr -> tm_of_timestamp dstr let date_changed ts src = let repo_ts = extract_timestamp src in msg "ts: %s; repo_ts: %s -> %s" (format_time ts) (format_time repo_ts) (if ts <> repo_ts then "changed" else "unchanged"); if ts <> repo_ts then Changed repo_ts else Unchanged let download_zipball src = let _ = Uri.of_string src.url in let dst = Filename.concat repo_root (src.name ^ ".zip") in if file_exists dst then begin msg "Destination file [%s] exists, removing." dst; Unix.unlink dst end; msg "Downloading source archive [%s]" src.url; let t0 = now () in grab_file src.url dst; log_info (Printf.sprintf "Download of [%s] completed in %.2f s." src.url (abs_float (now () -. t0))); dst let epoch = 0. let retrieve_sources sources = let aux src = let branch = src.name in let local_ts = try Git.last_commit branch with Git.Git_checkout_failed _ -> epoch in match date_changed local_ts src with | Changed upstream_ts -> New_zip { branch = branch; ts = upstream_ts; file = download_zipball src; dst = Filename.concat repo_root src.name } | Unchanged -> Stagnation in List.map aux sources let handle_zipball = function | Stagnation -> Extract_ignored "Nothing todo." | New_zip download -> begin msg "Extracting [%s] to [%s]" download.file download.dst; match unzip download.file download.dst with | Some _ -> Path download | None -> Extract_failed download.branch end (****************************************************************************** * job dispatch *****************************************************************************) let usage = Printf.sprintf "Usage: %s [ --check | --status | --update ] " self let run_status () = if not (dir_exists repo_dir) then msg "No directory at [%s]" repo_dir else begin match list_of_pipe "git branch -a" with | None -> msg "No git repository at [%s]" repo_dir | Some all_branches -> let known = List.fold_left (fun acc line -> let l = String.length line in if l < 3 then acc else begin let br = String.sub line 2 (l - 2) in if source_known br sources then br :: acc else acc end) [] all_branches in List.iter (fun br -> let tm = Git.last_commit br in msg "Found branch %7s with timestamp %s." br (format_time tm)) known end let run_check () = forall_sources (fun src -> let local_ts = Git.last_commit src.name in let upstream_ts = extract_timestamp src in msg "%s:" src.name; msg " local: %s" (format_time local_ts); msg " upstream: %s%s" (format_time upstream_ts) (if local_ts <> upstream_ts then " (new)" else "")) let run_prepare () = ensure_dir repo_dir; copy_static_files (); match ensure_dst_repo repo_dir with | `New -> msg "Created new local repository at [%s]." repo_dir | `Exists -> msg "Found local repository at %s,%s" repo_dir (List.fold_left (fun acc src -> (acc ^ Printf.sprintf " %s: %s" src.name (format_time (Git.last_commit src.name)))) "" sources) | `Fail -> raise (Bad_env "Failed to initialize destination repository.") let run_download () = let zipballs = retrieve_sources sources in let extracted = List.map handle_zipball zipballs in ignore (List.map Git.import extracted) let run_update () = let zipballs = match prepare_dst () with | `New | `Exists -> retrieve_sources sources | `Fail -> raise (Bad_env "Failed to initialize destination repository.") in let extracted = List.map handle_zipball zipballs in let imported = List.map Git.import extracted in let _ = List.iter (fun st -> match st with | Import_ignored rationale -> msg "%s" rationale | Import_failed errmsg -> err errmsg | Imported br -> msg "Pushing [%s] to mirrors ..." br; match Git.push br with | Good -> msg "Success!" | Bad errmsg -> err (Printf.sprintf "Failed to push %s: “%s”" br errmsg)) imported in () let run_sync () = List.iter (fun (rem, _) -> "mirror-"^rem |> Git.fetch) mirror_repos; Git.fetch "garden"; Git.pull "beta"; Git.pull "current"; let name = "garden-master" in Git.pull ~rbr:name name; msg "Pushing changes from context garden repo to mirror"; match Git.push name with | Good -> msg "Success!" | Bad errmsg -> err (Printf.sprintf "Failed to push %s: “%s”" name errmsg) let run_cleanup () = forall_sources (fun src -> let name = src.name in msg "Removing [%s] ->" name; List.iter (fun p -> msg " [%s]" p; ignore (list_of_pipe ("rm -rf -- " ^ p))) [ mk_tmp_webpage name; Filename.concat repo_root name; (Filename.concat repo_root name) ^ ".zip" ]) let run_push () = forall_sources (fun src -> let name = src.name in msg "Pushing branch [%s] to mirrors ..." name; match Git.push name with | Good -> msg "Success!" | Bad errmsg -> err (Printf.sprintf "Failed to push [%s]: “%s”" name errmsg)) let run_files () = Git.install_files () let dispatch_job = function | None -> print_endline usage | Some (descr, act) -> begin log_info (Printf.sprintf "started in %s mode at %s." descr (format_time t_start)); act () end (****************************************************************************** * arg handling *****************************************************************************) let parse_argv () = let job = ref None in Arg.parse [ ("--check", Arg.Unit (fun () -> job := Some ("status" , run_status ); ()), "check source timestamps"); ("--status", Arg.Unit (fun () -> job := Some ("check" , run_check ); ()), "print status of local repo"); ("--update", Arg.Unit (fun () -> job := Some ("update" , run_update ); ()), "update from sources"); ("--sync", Arg.Unit (fun () -> job := Some ("sync" , run_sync ); ()), "sync local repo"); ("--prepare", Arg.Unit (fun () -> job := Some ("prepare" , run_prepare ); ()), "prepare local repository"); ("--download", Arg.Unit (fun () -> job := Some ("download", run_download); ()), "download source packages"); ("--clean", Arg.Unit (fun () -> job := Some ("cleanup" , run_cleanup ); ()), "remove leftover files"); ("--push", Arg.Unit (fun () -> job := Some ("push" , run_push ); ()), "push to mirror"); ("--files", Arg.Unit (fun () -> job := Some ("files" , run_files ); ()), "install config and wrapper files"); ("--verbose", Arg.Unit (fun () -> set_verbosity 1; ()), "enable verbose output"); ] (fun a -> print_endline ("unexpected argument: "^a); exit 0) usage; !job (****************************************************************************** * main *****************************************************************************) let () = let job = parse_argv () in Git.set_path repo_dir; dispatch_job job; log_info (Printf.sprintf "mirror bot finished in %.2f s." (now () -. t_start)); ;;