diff options
author | Philipp Gesang <phg42.2a@gmail.com> | 2014-05-16 20:32:17 +0200 |
---|---|---|
committer | Philipp Gesang <phg42.2a@gmail.com> | 2014-05-16 20:32:17 +0200 |
commit | 94aa3d0f8c4c480ad67045f12d33b0f7813868e6 (patch) | |
tree | aca2510cac066d9947809e4a7b13efa7a77772eb | |
download | context-mirror-bot-94aa3d0f8c4c480ad67045f12d33b0f7813868e6.tar.gz |
initial
-rw-r--r-- | OMakefile | 59 | ||||
-rw-r--r-- | OMakeroot | 12 | ||||
-rw-r--r-- | context_mirror_bot.ml | 722 |
3 files changed, 793 insertions, 0 deletions
diff --git a/OMakefile b/OMakefile new file mode 100644 index 0000000..602ea95 --- /dev/null +++ b/OMakefile @@ -0,0 +1,59 @@ +# .PHONY: all install clean + +USE_OCAMLFIND = true + +OCAMLPACKS[] = curl uri xmlm unix fileutils str + #core + #async + #uri + +# +# if $(not $(OCAMLFIND_EXISTS)) +# eprintln(This project requires ocamlfind, but is was not found.) +# eprintln(You need to install ocamlfind and run "omake --configure".) +# exit 1 + +# +# Include path +# +# OCAMLINCLUDES += + +# +# Compile native or byte code? +# +# The default values are defined as follows: +# +NATIVE_ENABLED = false +BYTE_ENABLED = true + +# +# Various options + +#OCAMLFLAGS += +OCAMLCFLAGS += -g -thread +OCAMLOPTFLAGS += -g -thread + +# OCAML_LINK_FLAGS += +# OCAML_BYTE_LINK_FLAGS += +# OCAML_NATIVE_LINK_FLAGS += + +################################################ +# Generated files +# +# Workaround for the fact that ocamldep does not pay attention to .mll +# and .mly files. +# +# OCamlGeneratedFiles(parser.ml lexer.ml) + +FILES[] = + context_mirror_bot + +PROGRAM = context_mirror_bot +#OCAML_LIBS += core async + +#OCAML_CLIBS += +#OCAML_OTHER_LIBS += +#OCAML_LIB_FLAGS += + +.DEFAULT: $(OCamlProgram $(PROGRAM), $(FILES)) + diff --git a/OMakeroot b/OMakeroot new file mode 100644 index 0000000..be8d4b6 --- /dev/null +++ b/OMakeroot @@ -0,0 +1,12 @@ +open build/OCaml + +# +# The command-line variables are defined *after* the +# standard configuration has been loaded. +# +#DefineCommandVars() + +# +# Include the OMakefile in this directory. +# +.SUBDIRS: . diff --git a/context_mirror_bot.ml b/context_mirror_bot.ml new file mode 100644 index 0000000..529e05a --- /dev/null +++ b/context_mirror_bot.ml @@ -0,0 +1,722 @@ +(****************************************************************************** + * + * context-mirror-bot -- Grab the official Context source code from the + * official release and import it into a git repository that is hosted + * online. + * Copyright (C) 2014 Philipp Gesang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ****************************************************************************** + * + * Contact: Philipp Gesang <phg42.2a@gmail.com> + * Bitbucket: https://bitbucket.org/phg/context-mirror + * + ****************************************************************************** + * + * TODO + * * readme / license + * * curl user agent + * * logging + * + *****************************************************************************) + +type context_src = { + name : string; + url : string; + page : string; +} + +type download = { + branch : string; + ts : float; + file : string; + dst : string; +} + +type source_status = Stagnation | New_zip of download + +type extract_status = Extract_ignored | Extract_failed of string | Path of download + +type import_status = Import_ignored | Import_failed of string | Imported of string + +type timestamp_status = Changed of float | Unchanged + +type status = Good | Bad of errmsg +and errmsg = string + +(*type environment_status = New | Last of Unix.tm | Fail*) +exception Bad_env of string + +let bootstrap_repo = "git://repo.or.cz/context.git" (* Marius’s mirror *) +let garden_repo = "http://git.contextgarden.net/context/context.git" (* official, but broken *) +let mirror_repo = "git@bitbucket.org:phg/context-mirror.git" (* our mirror *) + +let sources = + [ { name = "beta"; + page = "http://www.pragma-ade.com/download-2.htm"; + url = "http://www.pragma-ade.com/context/beta/cont-tmf.zip"; }; + { name = "current"; + page = "http://www.pragma-ade.com/download-1.htm"; + url = "http://www.pragma-ade.com/context/current/cont-tmf.zip"; } ] + +let repo_root = "/home/phg/src/mirror" +let repo_subdir = "context-git" +let repo_dir = Filename.concat repo_root repo_subdir +let git_dir = Filename.concat repo_dir ".git" +let git_config_file = Filename.concat git_dir "config" +let ssh_wrapper = Filename.concat repo_root "git-ssh-wrapper.sh" +let ssh_id = "context_mirror_bot_id_rsa" +let ssh_id_dst = (Filename.concat repo_root ssh_id) + +let gitconfig = Printf.sprintf "\ +[core] + repositoryformatversion = 0 + filemode = true + bare = false + logallrefupdates = true + +[push] + default = matching + +[user] + email = phg42.2a@gmail.com + name = Context Git Mirror Bot + +[bitbucket] + user = context-mirror-bot + token = + +[remote \"initial-source\"] + url = %s + fetch = +refs/heads/*:refs/remotes/initial-source/* + +[branch \"origin\"] + remote = initial-source + merge = refs/heads/origin + +[remote \"garden\"] + url = %s + fetch = +refs/heads/*:refs/remotes/garden/* + +[branch \"official\"] + remote = garden + merge = refs/heads/origin + +[remote \"mirror\"] + url = %s + fetch = +refs/heads/*:refs/remotes/mirror/* + +[branch \"beta\"] + remote = mirror + merge = refs/heads/beta + +[branch \"current\"] + remote = mirror + merge = refs/heads/current +" bootstrap_repo garden_repo mirror_repo + +let ssh_wrapper_script = Printf.sprintf "\ +#!/bin/sh +## +## This file was crated automatically. Changes will be overwritten +## with the next invocation of the mirror bot. +## + +keyfile=\"%s\" +if test ! -f \"${keyfile}\"; then + echo \"[Git SSH Wrapper: ERROR] No SSH key file at ${keyfile}.\" + exit 1 +fi +ssh -i \"${keyfile}\" $@ + +" ssh_id_dst + +(****************************************************************************** + * auxiliary functions + *****************************************************************************) + +let format_time t = + let open Unix in + let ut = localtime t in + Printf.sprintf "%04d-%02d-%02d %02d:%02d:%02d" + (ut.tm_year + 1900) (ut.tm_mon + 1) ut.tm_mday + ut.tm_hour ut.tm_min ut.tm_sec + +let timestamp () = + format_time (Unix.gettimeofday ()) + +let verbosity = ref 0 + +let set_verbosity n = verbosity := n + +let msg ?lev:(lev=0) str = + if !verbosity >= lev then + Printf.printf "[bot %s] %s\n%!" (timestamp ()) str + +let err str = + Printf.eprintf "[bot %s: error] %s\n%!" (timestamp ()) str + +let dir_exists path = + try + let dh = Unix.opendir path in + Unix.closedir dh; + true + with + | Unix.Unix_error _ -> false + +let file_exists path = + try + let fd = Unix.openfile path [ Unix.O_RDONLY ] 0 in + Unix.close fd; + true + with + | Unix.Unix_error _ -> false + +(** + * The git mirror uses periods to separate dates whereas the Context website + * uses dashes: + * + * Marius: 2014.02.13 11:27 + * Pragma: 2014-02-14 17:07 + * + * We’re going to use dashes to for some extra consistency. Besides, that + * period-separated date format is weird to begin with. + *) +let re_date_find = Str.regexp "[0-9][0-9][0-9][0-9][.-][0-1][0-9][.-][0-3][0-9] [0-2][0-9]:[0-6][0-9]" +let re_date_match = Str.regexp "\\([0-9][0-9][0-9][0-9]\\)[.-]\\([0-1][0-9]\\)[.-]\\([0-3][0-9]\\) \\([0-2][0-9]\\):\\([0-6][0-9]\\)" + +let is_timestamp str = + try + ignore (Str.search_forward re_date_find str 0); + true + with + | Not_found -> false + +let tm_of_timestamp ?pos:(pos=0) ts = + (* this is dirty *) + assert (Str.string_match re_date_match ts pos); + let open Unix in + let (t, _) = mktime { + tm_sec = 0; + tm_min = int_of_string (Str.matched_group 5 ts); + tm_hour = int_of_string (Str.matched_group 4 ts); + tm_mday = int_of_string (Str.matched_group 3 ts); + tm_mon = int_of_string (Str.matched_group 2 ts) - 1; + tm_year = int_of_string (Str.matched_group 1 ts) - 1900; + tm_wday = -1; + tm_yday = -1; + tm_isdst = true; + } + in + t + +let list_of_pipe cmd = + let open Unix in + (* We’re only gonna read from the command’s stdout but + * keep the stderr around so as to prevent error messages + * from flooding the terminal. + *) + msg ~lev:1 (Printf.sprintf "exec “%s”" cmd); + let (so, si, se) = open_process_full cmd (environment ()) in + let rec aux acc = + try + aux ((input_line so) :: acc) + with + | End_of_file -> + let stat = close_process_full (so, si, se) in + (stat, List.rev acc) + in + match aux [] with + | (WEXITED code, l) when code = 0 -> Some l + | (WEXITED _, _) | (WSIGNALED _, _) | (WSTOPPED _, _) -> None + +let exec_commands cmds = + let rec aux = function + | [] -> Good + | cmd :: cmds -> + begin + match list_of_pipe cmd with + | Some _ -> aux cmds + | None -> Bad (Printf.sprintf "Failed to execute command “%s”." cmd) + end + in + aux cmds + +let unzip fname dst = list_of_pipe (Printf.sprintf "unzip -o -d %s %s" dst fname) + +let source_known name srcs = + try + ignore (List.find (fun src -> src.name = name) srcs); + true + with + | Not_found -> false + +let forall_sources f = (* cause side effects for sources *) + List.iter f sources + +(****************************************************************************** + * git integration + *****************************************************************************) + +module Git : sig + val init : unit -> bool + val set_path : string -> unit + val is_repo : unit -> bool + val last_commit : string -> float + val import : extract_status -> import_status + val push : string -> status + val install_files : unit -> unit + exception Git_checkout_failed of string +end = struct + exception Git_init_failed + exception Git_add_failed + exception Git_log_failed + exception Git_checkout_failed of string + exception Git_branch_failed of string + exception Git_commit_failed of string + exception Copy_ssh_key_failed + + let write_config () = + msg ~lev:1 ("Write git configuration to " ^ git_config_file); + let oc = open_out_gen [Open_wronly; Open_creat] 0o600 git_config_file in + output_string oc gitconfig; + close_out oc + + let install_ssh_wrapper () = + match list_of_pipe (Printf.sprintf "cp -f %s %s" ssh_id ssh_id_dst) with + | None -> raise Copy_ssh_key_failed + | Some _ -> + begin + let oc = open_out_gen [Open_wronly; Open_creat] 0o700 ssh_wrapper in + output_string oc ssh_wrapper_script; + close_out oc + end + + let install_files () = write_config (); install_ssh_wrapper () + + let init () = + msg ("Initializing new repository with state from " ^ bootstrap_repo); + match exec_commands [ + "git init"; + "git remote add initial-source " ^ bootstrap_repo; + "git pull initial-source origin"; + "git checkout origin"; + ] with + | Bad _ -> raise Git_init_failed + | Good -> install_files (); true + + let last_commit br = + match list_of_pipe ("git checkout " ^ br) with + | None -> raise (Git_checkout_failed br) + | Some _ -> + begin + match list_of_pipe "git log --max-count=1 --format=%B" with + | None | Some [] -> raise Git_log_failed + | Some (hd :: _) -> + let pos = try Str.search_forward re_date_find hd 0 + with Not_found -> + err (Printf.sprintf "Timestamp %s of branch %s is not well formed." hd br); + raise Git_log_failed + in + tm_of_timestamp ~pos:pos hd + end + + let set_path path = + msg ~lev:1 (Printf.sprintf "GIT_WORK_TREE=%s" path); + msg ~lev:1 (Printf.sprintf "GIT_DIR=%s" git_dir); + msg ~lev:1 (Printf.sprintf "GIT_SSH=%s" ssh_wrapper); + Unix.putenv "GIT_WORK_TREE" path; + Unix.putenv "GIT_DIR" git_dir; + Unix.putenv "GIT_SSH" ssh_wrapper + + let is_repo () = + match list_of_pipe "git status" with + | Some _ -> true + | None -> false + + let add timestamp = + exec_commands [ "git add ."; + "git add --update"; + Printf.sprintf + "git commit --message=\"%s\"" + (format_time timestamp) ] + + let gc () = + msg ~lev:1 "Running git garbage collector."; + ignore (list_of_pipe "git gc") + + let import status = + match status with + | Extract_ignored | Extract_failed _ -> Import_ignored + | Path { branch = branch; ts = ts; file = _; dst = dst } -> + begin + ignore (list_of_pipe "git checkout origin"); + let needs_branch = match list_of_pipe ("git checkout " ^ branch) with + | None -> true + | Some _ -> false + in + msg (Printf.sprintf + "Importing %s into %s branch %s." + dst + (if needs_branch then "new" else "old") + branch); + match exec_commands [ (Printf.sprintf "rm -rf -- %s/*" repo_dir); + (Printf.sprintf "mv %s/* %s/" dst repo_dir); + (Printf.sprintf "git add --all %s/" repo_dir); + "git add --update"; + ] with + | Bad errmsg -> Import_failed errmsg + | Good -> + begin + if needs_branch then + begin + match list_of_pipe (Printf.sprintf "git checkout -b %s" branch) with + | Some _ -> () + | None -> raise (Git_branch_failed branch) + end; + match list_of_pipe (Printf.sprintf "git commit --message=\"%s\"" (format_time ts)) with + | Some _ -> gc (); Imported branch + | None -> raise (Git_commit_failed branch) + end + end + + let push br = + match list_of_pipe ("git checkout " ^ br) with + | None -> raise (Git_checkout_failed br) + | Some _ -> + begin + match list_of_pipe ("git push mirror " ^ br) with + | None -> Bad "git push failed" + | Some _ -> Good + end +end + +(****************************************************************************** + * preparation + *****************************************************************************) + +exception Bad_file of string + +let ensure_dir d = + let open Unix in + try + let stat = stat d in + match stat.st_kind with + | S_DIR -> () + | _ -> raise (Bad_file "“%d” is not a directory.") + with + | Unix_error (err, _, _) when err = ENOENT -> + msg ("Creating directory " ^ d); + FileUtil.mkdir ~parent:true d + +let ensure_dst_repo d = + if not (Git.is_repo ()) then + if Git.init () then `New else `Fail + else + begin + msg ("Found existing repository at " ^ d); + `Exists + end + +let prepare_dst () = + try + ensure_dir repo_dir; + ensure_dst_repo repo_dir + with + | _ -> `Fail + +(****************************************************************************** + * source download + *****************************************************************************) + +let grab url writer = + let open Curl in + let c = init () in + set_url c url; + set_followlocation c true; + set_writefunction c writer; + perform c; + cleanup c + +let grab_string url = + let buf = Buffer.create 16 in + grab url (fun str -> Buffer.add_string buf str; String.length str); + Buffer.contents buf + +let grab_file url file = + let chan = open_out file in + grab url (fun str -> output_string chan str; String.length str); + close_out chan + +let mk_tmp_webpage name = + Filename.concat + repo_root + (Printf.sprintf "context_download_%s.tmp.html" name) + +let extract_timestamp src = + let tmp = mk_tmp_webpage src.name in + grab_file src.page tmp; + let input = open_in tmp in + let xi = Xmlm.make_input (`Channel input) in + let is_candidate name attrs = + if name <> "div" then + false + else begin + try + let _ = List.find (fun ((_, name), str) -> name = "class" && str = "more-room") attrs in + true + with + | Not_found -> false + end + in + let rec pull xi depth = + (*Printf.printf ">> depth %d\n" depth;*) + match Xmlm.input xi with + | `El_start ((_, name), attrs) when is_candidate name attrs -> begin + match Xmlm.peek xi with + | `Data str when is_timestamp str -> Some str + | _ -> pull xi (depth + 1) + end + | `El_start _ -> pull xi (depth + 1) + | `El_end -> if depth = 1 then None else pull xi (depth - 1) + | `Data _ -> pull xi depth + | `Dtd _ -> pull xi depth + in + let res = pull xi 0 in + close_in input; + (*if not (Xmlm.eoi xi) then*) + (*invalid_arg (Printf.sprintf "document %s not well-formed" src.url)*) + match res with + | None -> invalid_arg (Printf.sprintf "%s does not contain a date string" src.url) + | Some dstr -> tm_of_timestamp dstr + +let date_changed ts src = + let repo_ts = extract_timestamp src in + msg (Printf.sprintf "ts: %s; repo_ts: %s -> %s" + (format_time ts) + (format_time repo_ts) + (if ts <> repo_ts then "changed" else "unchanged")); + if ts <> repo_ts then + Changed repo_ts + else + Unchanged + +let download_zipball src = + let _ = Uri.of_string src.url in + let dst = Filename.concat repo_root (src.name ^ ".zip") in + if file_exists dst then + begin + msg (Printf.sprintf "Destination file %s exists, removing." dst); + Unix.unlink dst + end; + msg (Printf.sprintf "Downloading source archive %s" src.url); + grab_file src.url dst; + dst + +let epoch = 0. + +let retrieve_sources sources = + let aux src = + let branch = src.name in + let local_ts = try Git.last_commit branch with Git.Git_checkout_failed _ -> epoch in + match date_changed local_ts src with + | Changed upstream_ts -> New_zip { branch = branch; + ts = upstream_ts; + file = download_zipball src; + dst = Filename.concat repo_root src.name } + | Unchanged -> Stagnation + in + List.map aux sources + +let handle_zipball = function + | Stagnation -> Extract_ignored + | New_zip download -> + begin + msg (Printf.sprintf "Extracting %s to %s" download.file download.dst); + match unzip download.file download.dst with + | Some _ -> Path download + | None -> Extract_failed download.branch + end + +(****************************************************************************** + * arg handling + *****************************************************************************) + +type job = { + mutable kind : job_kind; +} +and job_kind = Status (* output status of local repo *) + | Check (* check source of updates *) + | Update (* update local repo from sources *) + | Prepare (* init local repo *) + | Download (* download and import upstream zips *) + | Clean (* remove unwanted side-effects *) + | Push (* push current state of repository *) + | Files (* write SSH wrapper and git config *) + | Nil (* no-op *) + +let usage = Printf.sprintf "Usage: + %s [ -check | -status | -update ] +" Sys.argv.(0) + +let parse_argv () = + let j : job = { + kind = Nil; + } + in + Arg.parse [ + ("--check", Arg.Unit (fun () -> j.kind <- Check; ()), "check source timestamps"); + ("--status", Arg.Unit (fun () -> j.kind <- Status; ()), "print status of local repo"); + ("--update", Arg.Unit (fun () -> j.kind <- Update; ()), "update from sources"); + ("--prepare", Arg.Unit (fun () -> j.kind <- Prepare; ()), "prepare local repository"); + ("--download", Arg.Unit (fun () -> j.kind <- Download; ()), "download source packages"); + ("--clean", Arg.Unit (fun () -> j.kind <- Clean; ()), "remove leftover files"); + ("--push", Arg.Unit (fun () -> j.kind <- Push; ()), "push to mirror"); + ("--files", Arg.Unit (fun () -> j.kind <- Files; ()), "install config and wrapper files"); + ("--verbose", Arg.Unit (fun () -> set_verbosity 1; ()), "enable verbose output"); + ] (fun a -> print_endline ("unexpected argument: "^a); exit 0) usage; + j + +(****************************************************************************** + * job dispatch + *****************************************************************************) + +let run_status () = + if not (dir_exists repo_dir) then msg ("No directory at " ^ repo_dir) else + begin + match list_of_pipe "git branch --all" with + | None -> msg ("No git repository at " ^ repo_dir) + | Some all_branches -> + let known = List.fold_left + (fun acc line -> + let l = String.length line in + if l < 3 then acc else + begin + let br = String.sub line 2 (l - 2) in + if source_known br sources then + br :: acc + else + acc + end) [] all_branches + in + List.iter + (fun br -> + let tm = Git.last_commit br in + msg (Printf.sprintf + "Found branch %7s with timestamp %s." + br + (format_time tm))) + known + end + +let run_check () = + forall_sources + (fun src -> + let local_ts = Git.last_commit src.name in + let upstream_ts = extract_timestamp src in + msg (Printf.sprintf "%s:" src.name); + msg (Printf.sprintf " local: %s" (format_time local_ts)); + msg (Printf.sprintf " upstream: %s%s" + (format_time upstream_ts) + (if local_ts <> upstream_ts then " (new)" else ""))) + +let run_prepare () = + ensure_dir repo_dir; + match ensure_dst_repo repo_dir with + | `New -> msg (Printf.sprintf "Created new local repository at %s." repo_dir) + | `Exists -> msg (Printf.sprintf + "Found local repository at %s,%s" + repo_dir + (List.fold_left + (fun acc src -> (acc ^ Printf.sprintf + " %s: %s" + src.name + (format_time (Git.last_commit src.name)))) + "" sources)) + | `Fail -> raise (Bad_env "Failed to initialize destination repository.") + +let run_download () = + let zipballs = retrieve_sources sources in + let extracted = List.map handle_zipball zipballs in + ignore (List.map Git.import extracted) + +let run_update () = + let zipballs = match prepare_dst () with + | `New | `Exists -> retrieve_sources sources + | `Fail -> raise (Bad_env "Failed to initialize destination repository.") + in + let extracted = List.map handle_zipball zipballs in + let imported = List.map Git.import extracted in + let _ = List.iter + (fun st -> + match st with + | Import_ignored | Import_failed _ -> () + | Imported br -> + msg (Printf.sprintf "Pushing %s to mirror ..." br); + match Git.push br with + | Good -> msg "Success!" + | Bad errmsg -> + err (Printf.sprintf "Failed to push %s: “%s”" br errmsg)) + imported + in + () + +let run_cleanup () = + forall_sources + (fun src -> + let name = src.name in + msg (Printf.sprintf "Removing %s ->" name); + List.iter (fun p -> + msg (Printf.sprintf " %s" p); + ignore (list_of_pipe ("rm -rf -- " ^ p))) + [ mk_tmp_webpage name; + Filename.concat repo_root name; + (Filename.concat repo_root name) ^ ".zip" ]) + +let run_push () = + forall_sources + (fun src -> + let name = src.name in + msg (Printf.sprintf "Pushing branch %s to mirror ..." name); + match Git.push name with + | Good -> msg "Success!" + | Bad errmsg -> + err (Printf.sprintf "Failed to push %s: “%s”" name errmsg)) + +let run_files () = Git.install_files () + + +let dispatch_job j = + match j.kind with + | Status -> run_status () + | Check -> run_check () + | Update -> run_update () + | Prepare -> run_prepare () + | Download -> run_download () + | Clean -> run_cleanup () + | Push -> run_push () + | Files -> run_files () + | Nil -> () + +(****************************************************************************** + * main + *****************************************************************************) + +let () = + let job = parse_argv () in + Git.set_path repo_dir; + dispatch_job job +;; + |