(****************************************************************************** * * context-mirror-bot -- Grab the official Context source code from the * official release and import it into a git repository that is hosted * online. * Copyright (C) 2014 Philipp Gesang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * ****************************************************************************** * * Contact: Philipp Gesang * Bitbucket: https://bitbucket.org/phg/context-mirror * *****************************************************************************) let t_start = Unix.gettimeofday () type context_src = { name : string; url : string; page : string; } type download = { branch : string; ts : float; file : string; dst : string; } type source_status = Stagnation | New_zip of download type extract_status = Extract_ignored | Extract_failed of string | Path of download type import_status = Import_ignored | Import_failed of string | Imported of string type timestamp_status = Changed of float | Unchanged type status = Good | Bad of errmsg and errmsg = string exception Bad_env of string exception Prepare_failed of string let bootstrap_repo = "git://repo.or.cz/context.git" (* Marius’s mirror *) let garden_repo = "http://git.contextgarden.net/context/context.git" (* official, but broken *) let mirror_repo = "git@bitbucket.org:phg/context-mirror.git" (* our mirror *) let sources = [ { name = "beta"; page = "http://www.pragma-ade.com/download-2.htm"; url = "http://www.pragma-ade.com/context/beta/cont-tmf.zip"; }; { name = "current"; page = "http://www.pragma-ade.com/download-1.htm"; url = "http://www.pragma-ade.com/context/current/cont-tmf.zip"; } ] (* owner and contact details for the user agent string *) let bot_owner = "Philipp Gesang" let owner_contact = "phg42.2a@gmail.com" let repo_root = "/home/phg/src/mirror" let repo_subdir = "context-git" let repo_dir = Filename.concat repo_root repo_subdir let git_dir = Filename.concat repo_dir ".git" let git_config_file = Filename.concat git_dir "config" let ssh_wrapper = Filename.concat repo_root "git-ssh-wrapper.sh" let ssh_id = "context_mirror_bot_id_rsa" let ssh_id_dst = (Filename.concat repo_root ssh_id) let static_files = [ ("context-license.txt", "COPYING"); ("context-readme.rst", "README.rst") ] let gitconfig = Printf.sprintf "\ [core] repositoryformatversion = 0 filemode = true bare = false logallrefupdates = true [push] default = matching [user] email = phg42.2a@gmail.com name = Context Git Mirror Bot [bitbucket] user = context-mirror-bot token = [remote \"initial-source\"] url = %s fetch = +refs/heads/*:refs/remotes/initial-source/* [branch \"origin\"] remote = initial-source merge = refs/heads/origin [remote \"garden\"] url = %s fetch = +refs/heads/*:refs/remotes/garden/* [branch \"official\"] remote = garden merge = refs/heads/origin [remote \"mirror\"] url = %s fetch = +refs/heads/*:refs/remotes/mirror/* [branch \"beta\"] remote = mirror merge = refs/heads/beta [branch \"current\"] remote = mirror merge = refs/heads/current " bootstrap_repo garden_repo mirror_repo let ssh_wrapper_script = Printf.sprintf "\ #!/bin/sh ## ## This file was crated automatically. Changes will be overwritten ## with the next invocation of the mirror bot. ## keyfile=\"%s\" if test ! -f \"${keyfile}\"; then echo \"[Git SSH Wrapper: ERROR] No SSH key file at ${keyfile}.\" exit 1 fi ssh -i \"${keyfile}\" $@ " ssh_id_dst (****************************************************************************** * auxiliary functions *****************************************************************************) let format_time t = let open Unix in let ut = localtime t in Printf.sprintf "%04d-%02d-%02d %02d:%02d:%02d" (ut.tm_year + 1900) (ut.tm_mon + 1) ut.tm_mday ut.tm_hour ut.tm_min ut.tm_sec let timestamp () = format_time (Unix.gettimeofday ()) let verbosity = ref 0 let set_verbosity n = verbosity := n let msg ?lev:(lev=0) str = if !verbosity >= lev then Printf.printf "[bot %s] %s\n%!" (timestamp ()) str let err str = Printf.eprintf "[bot %s: error] %s\n%!" (timestamp ()) str let logger = Syslog.openlog Sys.argv.(0);; let log_info msg = Syslog.syslog logger `LOG_INFO msg let log_debug msg = Syslog.syslog logger `LOG_DEBUG msg let dir_exists path = try let dh = Unix.opendir path in Unix.closedir dh; true with | Unix.Unix_error _ -> false let file_exists path = try let fd = Unix.openfile path [ Unix.O_RDONLY ] 0 in Unix.close fd; true with | Unix.Unix_error _ -> false (** * The git mirror uses periods to separate dates whereas the Context website * uses dashes: * * Marius: 2014.02.13 11:27 * Pragma: 2014-02-14 17:07 * * We’re going to use dashes to for some extra consistency. Besides, that * period-separated date format is weird to begin with. *) let re_date_find = Str.regexp "[0-9][0-9][0-9][0-9][.-][0-1][0-9][.-][0-3][0-9] [0-2][0-9]:[0-6][0-9]" let re_date_match = Str.regexp "\\([0-9][0-9][0-9][0-9]\\)[.-]\\([0-1][0-9]\\)[.-]\\([0-3][0-9]\\) \\([0-2][0-9]\\):\\([0-6][0-9]\\)" let is_timestamp str = try ignore (Str.search_forward re_date_find str 0); true with | Not_found -> false let tm_of_timestamp ?pos:(pos=0) ts = (* this is dirty *) assert (Str.string_match re_date_match ts pos); let open Unix in let (t, _) = mktime { tm_sec = 0; tm_min = int_of_string (Str.matched_group 5 ts); tm_hour = int_of_string (Str.matched_group 4 ts); tm_mday = int_of_string (Str.matched_group 3 ts); tm_mon = int_of_string (Str.matched_group 2 ts) - 1; tm_year = int_of_string (Str.matched_group 1 ts) - 1900; tm_wday = -1; tm_yday = -1; tm_isdst = true; } in t let list_of_pipe cmd = let open Unix in (* We’re only gonna read from the command’s stdout but * keep the stderr around so as to prevent error messages * from flooding the terminal. *) msg ~lev:1 (Printf.sprintf "exec “%s”" cmd); let (so, si, se) = open_process_full cmd (environment ()) in let rec aux acc = try aux ((input_line so) :: acc) with | End_of_file -> let stat = close_process_full (so, si, se) in (stat, List.rev acc) in match aux [] with | (WEXITED code, l) when code = 0 -> Some l | (WEXITED _, _) | (WSIGNALED _, _) | (WSTOPPED _, _) -> None let exec_commands cmds = let rec aux = function | [] -> Good | cmd :: cmds -> begin match list_of_pipe cmd with | Some _ -> aux cmds | None -> Bad (Printf.sprintf "Failed to execute command “%s”." cmd) end in aux cmds let unzip fname dst = list_of_pipe (Printf.sprintf "unzip -o -d %s %s" dst fname) let source_known name srcs = try ignore (List.find (fun src -> src.name = name) srcs); true with | Not_found -> false let forall_sources f = (* cause side effects for sources *) List.iter f sources (****************************************************************************** * git integration *****************************************************************************) module Git : sig val init : unit -> bool val set_path : string -> unit val is_repo : unit -> bool val last_commit : string -> float val import : extract_status -> import_status val push : string -> status val install_files : unit -> unit exception Git_checkout_failed of string end = struct exception Git_init_failed exception Git_add_failed exception Git_log_failed exception Git_checkout_failed of string exception Git_branch_failed of string exception Git_commit_failed of string exception Import_files_failed exception Copy_ssh_key_failed let write_config () = msg ~lev:1 ("Write git configuration to " ^ git_config_file); let oc = open_out_gen [Open_wronly; Open_creat] 0o600 git_config_file in output_string oc gitconfig; close_out oc let install_ssh_wrapper () = match list_of_pipe (Printf.sprintf "cp -f %s %s" ssh_id ssh_id_dst) with | None -> raise Copy_ssh_key_failed | Some _ -> begin let oc = open_out_gen [Open_wronly; Open_creat] 0o700 ssh_wrapper in output_string oc ssh_wrapper_script; close_out oc end let install_files () = write_config (); install_ssh_wrapper () let import_static_files () = let rec aux = function | [] -> true | (src, dst) :: tl -> begin let fullsrc = Filename.concat repo_root src in let fulldst = Filename.concat repo_dir dst in match list_of_pipe (Printf.sprintf "cp -f %s %s" fullsrc fulldst) with | None -> false | Some _ -> aux tl end in aux static_files let init () = msg ("Initializing new repository with state from " ^ bootstrap_repo); match exec_commands [ "git init"; "git remote add initial-source " ^ bootstrap_repo; "git pull initial-source origin"; "git checkout origin"; ] with | Bad _ -> raise Git_init_failed | Good -> install_files (); true let last_commit br = match list_of_pipe ("git checkout " ^ br) with | None -> raise (Git_checkout_failed br) | Some _ -> begin match list_of_pipe "git log --max-count=1 --format=%B" with | None | Some [] -> raise Git_log_failed | Some (hd :: _) -> let pos = try Str.search_forward re_date_find hd 0 with Not_found -> err (Printf.sprintf "Timestamp %s of branch %s is not well formed." hd br); raise Git_log_failed in tm_of_timestamp ~pos:pos hd end let set_path path = msg ~lev:1 (Printf.sprintf "GIT_WORK_TREE=%s" path); msg ~lev:1 (Printf.sprintf "GIT_DIR=%s" git_dir); msg ~lev:1 (Printf.sprintf "GIT_SSH=%s" ssh_wrapper); Unix.putenv "GIT_WORK_TREE" path; Unix.putenv "GIT_DIR" git_dir; Unix.putenv "GIT_SSH" ssh_wrapper let is_repo () = match list_of_pipe "git status" with | Some _ -> true | None -> false let gc () = msg ~lev:1 "Running git garbage collector."; ignore (list_of_pipe "git gc") let import status = match status with | Extract_ignored | Extract_failed _ -> Import_ignored | Path { branch = branch; ts = ts; file = _; dst = dst } -> begin ignore (list_of_pipe "git checkout origin"); let needs_branch = match list_of_pipe ("git checkout " ^ branch) with | None -> true | Some _ -> false in msg (Printf.sprintf "Importing %s into %s branch %s." dst (if needs_branch then "new" else "old") branch); match exec_commands [ (* remove current contents and replace it with the extracted files *) (Printf.sprintf "rm -rf -- %s/*" repo_dir); (Printf.sprintf "mv %s/* %s/" dst repo_dir); ] with | Bad errmsg -> Import_failed errmsg | Good -> if import_static_files () = false then raise Import_files_failed else (* -> git add; git commit *) begin match exec_commands [ (Printf.sprintf "git add --all %s/" repo_dir); "git add --update"; ] with | Bad errmsg -> Import_failed errmsg | Good -> begin if needs_branch then begin match list_of_pipe (Printf.sprintf "git checkout -b %s" branch) with | Some _ -> () | None -> raise (Git_branch_failed branch) end; match list_of_pipe (Printf.sprintf "git commit --message=\"%s\"" (format_time ts)) with | Some _ -> gc (); Imported branch | None -> raise (Git_commit_failed branch) end end end let push br = match list_of_pipe ("git checkout " ^ br) with | None -> raise (Git_checkout_failed br) | Some _ -> begin match list_of_pipe ("git push mirror " ^ br) with | None -> Bad "git push failed" | Some _ -> Good end end (****************************************************************************** * preparation *****************************************************************************) exception Bad_file of string let ensure_dir d = let open Unix in try let stat = stat d in match stat.st_kind with | S_DIR -> () | _ -> raise (Bad_file "“%d” is not a directory.") with | Unix_error (err, _, _) when err = ENOENT -> msg ("Creating directory " ^ d); FileUtil.mkdir ~parent:true d let ensure_dst_repo d = if not (Git.is_repo ()) then if Git.init () then `New else `Fail else begin msg ("Found existing repository at " ^ d); `Exists end let copy_static_files () = List.iter (fun (src, _) -> let dst = Filename.concat repo_root src in if file_exists dst then msg ~lev:1 (Printf.sprintf "File %s exists at %s, skipping." src dst) else begin msg ~lev:1 (Printf.sprintf "Install %s exists to %s" src dst); match list_of_pipe (Printf.sprintf "cp -f %s %s" src dst) with | None -> raise (Prepare_failed (Printf.sprintf "Error preparing file %s." src)) | Some _ -> () end) static_files let prepare_dst () = try ensure_dir repo_dir; copy_static_files (); ensure_dst_repo repo_dir with | _ -> `Fail (****************************************************************************** * source download *****************************************************************************) let user_agent = Printf.sprintf "Mirror Bot operated by %s <%s>" bot_owner owner_contact let grab url writer = let open Curl in let c = init () in set_url c url; setopt c (CURLOPT_USERAGENT user_agent); set_followlocation c true; set_writefunction c writer; perform c; cleanup c let grab_string url = let buf = Buffer.create 16 in grab url (fun str -> Buffer.add_string buf str; String.length str); Buffer.contents buf let grab_file url file = let chan = open_out file in grab url (fun str -> output_string chan str; String.length str); close_out chan let mk_tmp_webpage name = Filename.concat repo_root (Printf.sprintf "context_download_%s.tmp.html" name) let extract_timestamp src = let tmp = mk_tmp_webpage src.name in grab_file src.page tmp; let input = open_in tmp in let xi = Xmlm.make_input (`Channel input) in let is_candidate name attrs = if name <> "div" then false else begin try let _ = List.find (fun ((_, name), str) -> name = "class" && str = "more-room") attrs in true with | Not_found -> false end in let rec pull xi depth = (*Printf.printf ">> depth %d\n" depth;*) match Xmlm.input xi with | `El_start ((_, name), attrs) when is_candidate name attrs -> begin match Xmlm.peek xi with | `Data str when is_timestamp str -> Some str | _ -> pull xi (depth + 1) end | `El_start _ -> pull xi (depth + 1) | `El_end -> if depth = 1 then None else pull xi (depth - 1) | `Data _ -> pull xi depth | `Dtd _ -> pull xi depth in let res = pull xi 0 in close_in input; (*if not (Xmlm.eoi xi) then*) (*invalid_arg (Printf.sprintf "document %s not well-formed" src.url)*) match res with | None -> invalid_arg (Printf.sprintf "%s does not contain a date string" src.url) | Some dstr -> tm_of_timestamp dstr let date_changed ts src = let repo_ts = extract_timestamp src in msg (Printf.sprintf "ts: %s; repo_ts: %s -> %s" (format_time ts) (format_time repo_ts) (if ts <> repo_ts then "changed" else "unchanged")); if ts <> repo_ts then Changed repo_ts else Unchanged let download_zipball src = let _ = Uri.of_string src.url in let dst = Filename.concat repo_root (src.name ^ ".zip") in if file_exists dst then begin msg (Printf.sprintf "Destination file %s exists, removing." dst); Unix.unlink dst end; msg (Printf.sprintf "Downloading source archive %s" src.url); grab_file src.url dst; dst let epoch = 0. let retrieve_sources sources = let aux src = let branch = src.name in let local_ts = try Git.last_commit branch with Git.Git_checkout_failed _ -> epoch in match date_changed local_ts src with | Changed upstream_ts -> New_zip { branch = branch; ts = upstream_ts; file = download_zipball src; dst = Filename.concat repo_root src.name } | Unchanged -> Stagnation in List.map aux sources let handle_zipball = function | Stagnation -> Extract_ignored | New_zip download -> begin msg (Printf.sprintf "Extracting %s to %s" download.file download.dst); match unzip download.file download.dst with | Some _ -> Path download | None -> Extract_failed download.branch end (****************************************************************************** * arg handling *****************************************************************************) type job = { mutable kind : job_kind; } and job_kind = Status (* output status of local repo *) | Check (* check source of updates *) | Update (* update local repo from sources *) | Prepare (* init local repo *) | Download (* download and import upstream zips *) | Clean (* remove unwanted side-effects *) | Push (* push current state of repository *) | Files (* write SSH wrapper and git config *) | Nil (* no-op *) let usage = Printf.sprintf "Usage: %s [ --check | --status | --update ] " Sys.argv.(0) let parse_argv () = let j : job = { kind = Nil; } in Arg.parse [ ("--check", Arg.Unit (fun () -> j.kind <- Check; ()), "check source timestamps"); ("--status", Arg.Unit (fun () -> j.kind <- Status; ()), "print status of local repo"); ("--update", Arg.Unit (fun () -> j.kind <- Update; ()), "update from sources"); ("--prepare", Arg.Unit (fun () -> j.kind <- Prepare; ()), "prepare local repository"); ("--download", Arg.Unit (fun () -> j.kind <- Download; ()), "download source packages"); ("--clean", Arg.Unit (fun () -> j.kind <- Clean; ()), "remove leftover files"); ("--push", Arg.Unit (fun () -> j.kind <- Push; ()), "push to mirror"); ("--files", Arg.Unit (fun () -> j.kind <- Files; ()), "install config and wrapper files"); ("--verbose", Arg.Unit (fun () -> set_verbosity 1; ()), "enable verbose output"); ] (fun a -> print_endline ("unexpected argument: "^a); exit 0) usage; j (****************************************************************************** * job dispatch *****************************************************************************) let run_status () = if not (dir_exists repo_dir) then msg ("No directory at " ^ repo_dir) else begin match list_of_pipe "git branch --all" with | None -> msg ("No git repository at " ^ repo_dir) | Some all_branches -> let known = List.fold_left (fun acc line -> let l = String.length line in if l < 3 then acc else begin let br = String.sub line 2 (l - 2) in if source_known br sources then br :: acc else acc end) [] all_branches in List.iter (fun br -> let tm = Git.last_commit br in msg (Printf.sprintf "Found branch %7s with timestamp %s." br (format_time tm))) known end let run_check () = forall_sources (fun src -> let local_ts = Git.last_commit src.name in let upstream_ts = extract_timestamp src in msg (Printf.sprintf "%s:" src.name); msg (Printf.sprintf " local: %s" (format_time local_ts)); msg (Printf.sprintf " upstream: %s%s" (format_time upstream_ts) (if local_ts <> upstream_ts then " (new)" else ""))) let run_prepare () = ensure_dir repo_dir; copy_static_files (); match ensure_dst_repo repo_dir with | `New -> msg (Printf.sprintf "Created new local repository at %s." repo_dir) | `Exists -> msg (Printf.sprintf "Found local repository at %s,%s" repo_dir (List.fold_left (fun acc src -> (acc ^ Printf.sprintf " %s: %s" src.name (format_time (Git.last_commit src.name)))) "" sources)) | `Fail -> raise (Bad_env "Failed to initialize destination repository.") let run_download () = let zipballs = retrieve_sources sources in let extracted = List.map handle_zipball zipballs in ignore (List.map Git.import extracted) let run_update () = let zipballs = match prepare_dst () with | `New | `Exists -> retrieve_sources sources | `Fail -> raise (Bad_env "Failed to initialize destination repository.") in let extracted = List.map handle_zipball zipballs in let imported = List.map Git.import extracted in let _ = List.iter (fun st -> match st with | Import_ignored | Import_failed _ -> () | Imported br -> msg (Printf.sprintf "Pushing %s to mirror ..." br); match Git.push br with | Good -> msg "Success!" | Bad errmsg -> err (Printf.sprintf "Failed to push %s: “%s”" br errmsg)) imported in () let run_cleanup () = forall_sources (fun src -> let name = src.name in msg (Printf.sprintf "Removing %s ->" name); List.iter (fun p -> msg (Printf.sprintf " %s" p); ignore (list_of_pipe ("rm -rf -- " ^ p))) [ mk_tmp_webpage name; Filename.concat repo_root name; (Filename.concat repo_root name) ^ ".zip" ]) let run_push () = forall_sources (fun src -> let name = src.name in msg (Printf.sprintf "Pushing branch %s to mirror ..." name); match Git.push name with | Good -> msg "Success!" | Bad errmsg -> err (Printf.sprintf "Failed to push %s: “%s”" name errmsg)) let run_files () = Git.install_files () let print_kind = function | Status -> "status" | Check -> "check" | Update -> "update" | Prepare -> "prepare" | Download -> "download" | Clean -> "cleanup" | Push -> "push" | Files -> "files" | Nil -> "()" let dispatch_job j = log_info (Printf.sprintf "started in %s mode at %s." (print_kind j.kind) (format_time t_start)); match j.kind with | Status -> run_status () | Check -> run_check () | Update -> run_update () | Prepare -> run_prepare () | Download -> run_download () | Clean -> run_cleanup () | Push -> run_push () | Files -> run_files () | Nil -> () (****************************************************************************** * main *****************************************************************************) let () = let job = parse_argv () in Git.set_path repo_dir; dispatch_job job; log_info (Printf.sprintf "mirror bot finished in %.2f ms." (Unix.gettimeofday () -. t_start)); ;;