(******************************************************************************
*
* context-mirror-bot -- Grab the official Context source code from the
* official release and import it into a git repository that is hosted
* online.
* Copyright (C) 2014 Philipp Gesang
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
******************************************************************************
*
* Contact: Philipp Gesang
* Bitbucket: https://bitbucket.org/phg/context-mirror
*
*****************************************************************************)
let t_start = Unix.gettimeofday ()
type context_src = {
name : string;
url : string;
page : string;
}
type download = {
branch : string;
ts : float;
file : string;
dst : string;
}
type source_status = Stagnation | New_zip of download
type extract_status = Extract_ignored | Extract_failed of string | Path of download
type import_status = Import_ignored | Import_failed of string | Imported of string
type timestamp_status = Changed of float | Unchanged
type status = Good | Bad of errmsg
and errmsg = string
exception Bad_env of string
exception Prepare_failed of string
let bootstrap_repo = "git://repo.or.cz/context.git" (* Marius’s mirror *)
let garden_repo = "http://git.contextgarden.net/context/context.git" (* official, but broken *)
let mirror_repo = "git@bitbucket.org:phg/context-mirror.git" (* our mirror *)
let sources =
[ { name = "beta";
page = "http://www.pragma-ade.com/download-2.htm";
url = "http://www.pragma-ade.com/context/beta/cont-tmf.zip"; };
{ name = "current";
page = "http://www.pragma-ade.com/download-1.htm";
url = "http://www.pragma-ade.com/context/current/cont-tmf.zip"; } ]
(* owner and contact details for the user agent string *)
let bot_owner = "Philipp Gesang"
let owner_contact = "phg42.2a@gmail.com"
let repo_root = "/home/phg/src/mirror"
let repo_subdir = "context-git"
let repo_dir = Filename.concat repo_root repo_subdir
let git_dir = Filename.concat repo_dir ".git"
let git_config_file = Filename.concat git_dir "config"
let ssh_wrapper = Filename.concat repo_root "git-ssh-wrapper.sh"
let ssh_id = "context_mirror_bot_id_rsa"
let ssh_id_dst = (Filename.concat repo_root ssh_id)
let static_files = [
("context-license.txt", "COPYING");
("context-readme.rst", "README.rst")
]
let gitconfig = Printf.sprintf "\
[core]
repositoryformatversion = 0
filemode = true
bare = false
logallrefupdates = true
[push]
default = matching
[user]
email = phg42.2a@gmail.com
name = Context Git Mirror Bot
[bitbucket]
user = context-mirror-bot
token =
[remote \"initial-source\"]
url = %s
fetch = +refs/heads/*:refs/remotes/initial-source/*
[branch \"origin\"]
remote = initial-source
merge = refs/heads/origin
[remote \"garden\"]
url = %s
fetch = +refs/heads/*:refs/remotes/garden/*
[branch \"official\"]
remote = garden
merge = refs/heads/origin
[remote \"mirror\"]
url = %s
fetch = +refs/heads/*:refs/remotes/mirror/*
[branch \"beta\"]
remote = mirror
merge = refs/heads/beta
[branch \"current\"]
remote = mirror
merge = refs/heads/current
" bootstrap_repo garden_repo mirror_repo
let ssh_wrapper_script = Printf.sprintf "\
#!/bin/sh
##
## This file was crated automatically. Changes will be overwritten
## with the next invocation of the mirror bot.
##
keyfile=\"%s\"
if test ! -f \"${keyfile}\"; then
echo \"[Git SSH Wrapper: ERROR] No SSH key file at ${keyfile}.\"
exit 1
fi
ssh -i \"${keyfile}\" $@
" ssh_id_dst
(******************************************************************************
* auxiliary functions
*****************************************************************************)
let format_time t =
let open Unix in
let ut = localtime t in
Printf.sprintf "%04d-%02d-%02d %02d:%02d:%02d"
(ut.tm_year + 1900) (ut.tm_mon + 1) ut.tm_mday
ut.tm_hour ut.tm_min ut.tm_sec
let timestamp () =
format_time (Unix.gettimeofday ())
let verbosity = ref 0
let set_verbosity n = verbosity := n
let msg ?lev:(lev=0) str =
if !verbosity >= lev then
Printf.printf "[bot %s] %s\n%!" (timestamp ()) str
let err str =
Printf.eprintf "[bot %s: error] %s\n%!" (timestamp ()) str
let logger = Syslog.openlog Sys.argv.(0);;
let log_info msg =
Syslog.syslog logger `LOG_INFO msg
let log_debug msg =
Syslog.syslog logger `LOG_DEBUG msg
let dir_exists path =
try
let dh = Unix.opendir path in
Unix.closedir dh;
true
with
| Unix.Unix_error _ -> false
let file_exists path =
try
let fd = Unix.openfile path [ Unix.O_RDONLY ] 0 in
Unix.close fd;
true
with
| Unix.Unix_error _ -> false
(**
* The git mirror uses periods to separate dates whereas the Context website
* uses dashes:
*
* Marius: 2014.02.13 11:27
* Pragma: 2014-02-14 17:07
*
* We’re going to use dashes to for some extra consistency. Besides, that
* period-separated date format is weird to begin with.
*)
let re_date_find = Str.regexp "[0-9][0-9][0-9][0-9][.-][0-1][0-9][.-][0-3][0-9] [0-2][0-9]:[0-6][0-9]"
let re_date_match = Str.regexp "\\([0-9][0-9][0-9][0-9]\\)[.-]\\([0-1][0-9]\\)[.-]\\([0-3][0-9]\\) \\([0-2][0-9]\\):\\([0-6][0-9]\\)"
let is_timestamp str =
try
ignore (Str.search_forward re_date_find str 0);
true
with
| Not_found -> false
let tm_of_timestamp ?pos:(pos=0) ts =
(* this is dirty *)
assert (Str.string_match re_date_match ts pos);
let open Unix in
let (t, _) = mktime {
tm_sec = 0;
tm_min = int_of_string (Str.matched_group 5 ts);
tm_hour = int_of_string (Str.matched_group 4 ts);
tm_mday = int_of_string (Str.matched_group 3 ts);
tm_mon = int_of_string (Str.matched_group 2 ts) - 1;
tm_year = int_of_string (Str.matched_group 1 ts) - 1900;
tm_wday = -1;
tm_yday = -1;
tm_isdst = true;
}
in
t
let list_of_pipe cmd =
let open Unix in
(* We’re only gonna read from the command’s stdout but
* keep the stderr around so as to prevent error messages
* from flooding the terminal.
*)
msg ~lev:1 (Printf.sprintf "exec “%s”" cmd);
let (so, si, se) = open_process_full cmd (environment ()) in
let rec aux acc =
try
aux ((input_line so) :: acc)
with
| End_of_file ->
let stat = close_process_full (so, si, se) in
(stat, List.rev acc)
in
match aux [] with
| (WEXITED code, l) when code = 0 -> Some l
| (WEXITED _, _) | (WSIGNALED _, _) | (WSTOPPED _, _) -> None
let exec_commands cmds =
let rec aux = function
| [] -> Good
| cmd :: cmds ->
begin
match list_of_pipe cmd with
| Some _ -> aux cmds
| None -> Bad (Printf.sprintf "Failed to execute command “%s”." cmd)
end
in
aux cmds
let unzip fname dst = list_of_pipe (Printf.sprintf "unzip -o -d %s %s" dst fname)
let source_known name srcs =
try
ignore (List.find (fun src -> src.name = name) srcs);
true
with
| Not_found -> false
let forall_sources f = (* cause side effects for sources *)
List.iter f sources
(******************************************************************************
* git integration
*****************************************************************************)
module Git : sig
val init : unit -> bool
val set_path : string -> unit
val is_repo : unit -> bool
val last_commit : string -> float
val import : extract_status -> import_status
val push : string -> status
val install_files : unit -> unit
exception Git_checkout_failed of string
end = struct
exception Git_init_failed
exception Git_add_failed
exception Git_log_failed
exception Git_checkout_failed of string
exception Git_branch_failed of string
exception Git_commit_failed of string
exception Import_files_failed
exception Copy_ssh_key_failed
let write_config () =
msg ~lev:1 ("Write git configuration to " ^ git_config_file);
let oc = open_out_gen [Open_wronly; Open_creat] 0o600 git_config_file in
output_string oc gitconfig;
close_out oc
let install_ssh_wrapper () =
match list_of_pipe (Printf.sprintf "cp -f %s %s" ssh_id ssh_id_dst) with
| None -> raise Copy_ssh_key_failed
| Some _ ->
begin
let oc = open_out_gen [Open_wronly; Open_creat] 0o700 ssh_wrapper in
output_string oc ssh_wrapper_script;
close_out oc
end
let install_files () = write_config (); install_ssh_wrapper ()
let import_static_files () =
let rec aux = function
| [] -> true
| (src, dst) :: tl ->
begin
let fullsrc = Filename.concat repo_root src in
let fulldst = Filename.concat repo_dir dst in
match list_of_pipe (Printf.sprintf "cp -f %s %s" fullsrc fulldst) with
| None -> false
| Some _ -> aux tl
end
in
aux static_files
let init () =
msg ("Initializing new repository with state from " ^ bootstrap_repo);
match exec_commands [
"git init";
"git remote add initial-source " ^ bootstrap_repo;
"git pull initial-source origin";
"git checkout origin";
] with
| Bad _ -> raise Git_init_failed
| Good -> install_files (); true
let last_commit br =
match list_of_pipe ("git checkout " ^ br) with
| None -> raise (Git_checkout_failed br)
| Some _ ->
begin
match list_of_pipe "git log --max-count=1 --format=%B" with
| None | Some [] -> raise Git_log_failed
| Some (hd :: _) ->
let pos = try Str.search_forward re_date_find hd 0
with Not_found ->
err (Printf.sprintf "Timestamp %s of branch %s is not well formed." hd br);
raise Git_log_failed
in
tm_of_timestamp ~pos:pos hd
end
let set_path path =
msg ~lev:1 (Printf.sprintf "GIT_WORK_TREE=%s" path);
msg ~lev:1 (Printf.sprintf "GIT_DIR=%s" git_dir);
msg ~lev:1 (Printf.sprintf "GIT_SSH=%s" ssh_wrapper);
Unix.putenv "GIT_WORK_TREE" path;
Unix.putenv "GIT_DIR" git_dir;
Unix.putenv "GIT_SSH" ssh_wrapper
let is_repo () =
match list_of_pipe "git status" with
| Some _ -> true
| None -> false
let gc () =
msg ~lev:1 "Running git garbage collector.";
ignore (list_of_pipe "git gc")
let import status =
match status with
| Extract_ignored | Extract_failed _ -> Import_ignored
| Path { branch = branch; ts = ts; file = _; dst = dst } ->
begin
ignore (list_of_pipe "git checkout origin");
let needs_branch = match list_of_pipe ("git checkout " ^ branch) with
| None -> true
| Some _ -> false
in
msg (Printf.sprintf
"Importing %s into %s branch %s."
dst
(if needs_branch then "new" else "old")
branch);
match exec_commands [
(* remove current contents and replace it with the extracted files *)
(Printf.sprintf "rm -rf -- %s/*" repo_dir);
(Printf.sprintf "mv %s/* %s/" dst repo_dir);
] with
| Bad errmsg -> Import_failed errmsg
| Good ->
if import_static_files () = false then
raise Import_files_failed
else (* -> git add; git commit *)
begin
match exec_commands [
(Printf.sprintf "git add --all %s/" repo_dir);
"git add --update";
] with
| Bad errmsg -> Import_failed errmsg
| Good ->
begin
if needs_branch then
begin
match list_of_pipe (Printf.sprintf "git checkout -b %s" branch) with
| Some _ -> ()
| None -> raise (Git_branch_failed branch)
end;
match list_of_pipe (Printf.sprintf "git commit --message=\"%s\"" (format_time ts)) with
| Some _ -> gc (); Imported branch
| None -> raise (Git_commit_failed branch)
end
end
end
let push br =
match list_of_pipe ("git checkout " ^ br) with
| None -> raise (Git_checkout_failed br)
| Some _ ->
begin
match list_of_pipe ("git push mirror " ^ br) with
| None -> Bad "git push failed"
| Some _ -> Good
end
end
(******************************************************************************
* preparation
*****************************************************************************)
exception Bad_file of string
let ensure_dir d =
let open Unix in
try
let stat = stat d in
match stat.st_kind with
| S_DIR -> ()
| _ -> raise (Bad_file "“%d” is not a directory.")
with
| Unix_error (err, _, _) when err = ENOENT ->
msg ("Creating directory " ^ d);
FileUtil.mkdir ~parent:true d
let ensure_dst_repo d =
if not (Git.is_repo ()) then
if Git.init () then `New else `Fail
else
begin
msg ("Found existing repository at " ^ d);
`Exists
end
let copy_static_files () =
List.iter
(fun (src, _) ->
let dst = Filename.concat repo_root src in
if file_exists dst then
msg ~lev:1 (Printf.sprintf "File %s exists at %s, skipping." src dst)
else
begin
msg ~lev:1 (Printf.sprintf "Install %s exists to %s" src dst);
match list_of_pipe (Printf.sprintf "cp -f %s %s" src dst) with
| None -> raise (Prepare_failed (Printf.sprintf "Error preparing file %s." src))
| Some _ -> ()
end)
static_files
let prepare_dst () =
try
ensure_dir repo_dir;
copy_static_files ();
ensure_dst_repo repo_dir
with
| _ -> `Fail
(******************************************************************************
* source download
*****************************************************************************)
let user_agent = Printf.sprintf
"Mirror Bot operated by %s <%s>"
bot_owner
owner_contact
let grab url writer =
let open Curl in
let c = init () in
set_url c url;
setopt c (CURLOPT_USERAGENT user_agent);
set_followlocation c true;
set_writefunction c writer;
perform c;
cleanup c
let grab_string url =
let buf = Buffer.create 16 in
grab url (fun str -> Buffer.add_string buf str; String.length str);
Buffer.contents buf
let grab_file url file =
let chan = open_out file in
grab url (fun str -> output_string chan str; String.length str);
close_out chan
let mk_tmp_webpage name =
Filename.concat
repo_root
(Printf.sprintf "context_download_%s.tmp.html" name)
let extract_timestamp src =
let tmp = mk_tmp_webpage src.name in
grab_file src.page tmp;
let input = open_in tmp in
let xi = Xmlm.make_input (`Channel input) in
let is_candidate name attrs =
if name <> "div" then
false
else begin
try
let _ = List.find (fun ((_, name), str) -> name = "class" && str = "more-room") attrs in
true
with
| Not_found -> false
end
in
let rec pull xi depth =
(*Printf.printf ">> depth %d\n" depth;*)
match Xmlm.input xi with
| `El_start ((_, name), attrs) when is_candidate name attrs -> begin
match Xmlm.peek xi with
| `Data str when is_timestamp str -> Some str
| _ -> pull xi (depth + 1)
end
| `El_start _ -> pull xi (depth + 1)
| `El_end -> if depth = 1 then None else pull xi (depth - 1)
| `Data _ -> pull xi depth
| `Dtd _ -> pull xi depth
in
let res = pull xi 0 in
close_in input;
(*if not (Xmlm.eoi xi) then*)
(*invalid_arg (Printf.sprintf "document %s not well-formed" src.url)*)
match res with
| None -> invalid_arg (Printf.sprintf "%s does not contain a date string" src.url)
| Some dstr -> tm_of_timestamp dstr
let date_changed ts src =
let repo_ts = extract_timestamp src in
msg (Printf.sprintf "ts: %s; repo_ts: %s -> %s"
(format_time ts)
(format_time repo_ts)
(if ts <> repo_ts then "changed" else "unchanged"));
if ts <> repo_ts then
Changed repo_ts
else
Unchanged
let download_zipball src =
let _ = Uri.of_string src.url in
let dst = Filename.concat repo_root (src.name ^ ".zip") in
if file_exists dst then
begin
msg (Printf.sprintf "Destination file %s exists, removing." dst);
Unix.unlink dst
end;
msg (Printf.sprintf "Downloading source archive %s" src.url);
grab_file src.url dst;
dst
let epoch = 0.
let retrieve_sources sources =
let aux src =
let branch = src.name in
let local_ts = try Git.last_commit branch with Git.Git_checkout_failed _ -> epoch in
match date_changed local_ts src with
| Changed upstream_ts -> New_zip { branch = branch;
ts = upstream_ts;
file = download_zipball src;
dst = Filename.concat repo_root src.name }
| Unchanged -> Stagnation
in
List.map aux sources
let handle_zipball = function
| Stagnation -> Extract_ignored
| New_zip download ->
begin
msg (Printf.sprintf "Extracting %s to %s" download.file download.dst);
match unzip download.file download.dst with
| Some _ -> Path download
| None -> Extract_failed download.branch
end
(******************************************************************************
* arg handling
*****************************************************************************)
type job = {
mutable kind : job_kind;
}
and job_kind = Status (* output status of local repo *)
| Check (* check source of updates *)
| Update (* update local repo from sources *)
| Prepare (* init local repo *)
| Download (* download and import upstream zips *)
| Clean (* remove unwanted side-effects *)
| Push (* push current state of repository *)
| Files (* write SSH wrapper and git config *)
| Nil (* no-op *)
let usage = Printf.sprintf "Usage:
%s [ --check | --status | --update ]
" Sys.argv.(0)
let parse_argv () =
let j : job = {
kind = Nil;
}
in
Arg.parse [
("--check", Arg.Unit (fun () -> j.kind <- Check; ()), "check source timestamps");
("--status", Arg.Unit (fun () -> j.kind <- Status; ()), "print status of local repo");
("--update", Arg.Unit (fun () -> j.kind <- Update; ()), "update from sources");
("--prepare", Arg.Unit (fun () -> j.kind <- Prepare; ()), "prepare local repository");
("--download", Arg.Unit (fun () -> j.kind <- Download; ()), "download source packages");
("--clean", Arg.Unit (fun () -> j.kind <- Clean; ()), "remove leftover files");
("--push", Arg.Unit (fun () -> j.kind <- Push; ()), "push to mirror");
("--files", Arg.Unit (fun () -> j.kind <- Files; ()), "install config and wrapper files");
("--verbose", Arg.Unit (fun () -> set_verbosity 1; ()), "enable verbose output");
] (fun a -> print_endline ("unexpected argument: "^a); exit 0) usage;
j
(******************************************************************************
* job dispatch
*****************************************************************************)
let run_status () =
if not (dir_exists repo_dir) then msg ("No directory at " ^ repo_dir) else
begin
match list_of_pipe "git branch --all" with
| None -> msg ("No git repository at " ^ repo_dir)
| Some all_branches ->
let known = List.fold_left
(fun acc line ->
let l = String.length line in
if l < 3 then acc else
begin
let br = String.sub line 2 (l - 2) in
if source_known br sources then
br :: acc
else
acc
end) [] all_branches
in
List.iter
(fun br ->
let tm = Git.last_commit br in
msg (Printf.sprintf
"Found branch %7s with timestamp %s."
br
(format_time tm)))
known
end
let run_check () =
forall_sources
(fun src ->
let local_ts = Git.last_commit src.name in
let upstream_ts = extract_timestamp src in
msg (Printf.sprintf "%s:" src.name);
msg (Printf.sprintf " local: %s" (format_time local_ts));
msg (Printf.sprintf " upstream: %s%s"
(format_time upstream_ts)
(if local_ts <> upstream_ts then " (new)" else "")))
let run_prepare () =
ensure_dir repo_dir;
copy_static_files ();
match ensure_dst_repo repo_dir with
| `New -> msg (Printf.sprintf "Created new local repository at %s." repo_dir)
| `Exists -> msg (Printf.sprintf
"Found local repository at %s,%s"
repo_dir
(List.fold_left
(fun acc src -> (acc ^ Printf.sprintf
" %s: %s"
src.name
(format_time (Git.last_commit src.name))))
"" sources))
| `Fail -> raise (Bad_env "Failed to initialize destination repository.")
let run_download () =
let zipballs = retrieve_sources sources in
let extracted = List.map handle_zipball zipballs in
ignore (List.map Git.import extracted)
let run_update () =
let zipballs = match prepare_dst () with
| `New | `Exists -> retrieve_sources sources
| `Fail -> raise (Bad_env "Failed to initialize destination repository.")
in
let extracted = List.map handle_zipball zipballs in
let imported = List.map Git.import extracted in
let _ = List.iter
(fun st ->
match st with
| Import_ignored | Import_failed _ -> ()
| Imported br ->
msg (Printf.sprintf "Pushing %s to mirror ..." br);
match Git.push br with
| Good -> msg "Success!"
| Bad errmsg ->
err (Printf.sprintf "Failed to push %s: “%s”" br errmsg))
imported
in
()
let run_cleanup () =
forall_sources
(fun src ->
let name = src.name in
msg (Printf.sprintf "Removing %s ->" name);
List.iter (fun p ->
msg (Printf.sprintf " %s" p);
ignore (list_of_pipe ("rm -rf -- " ^ p)))
[ mk_tmp_webpage name;
Filename.concat repo_root name;
(Filename.concat repo_root name) ^ ".zip" ])
let run_push () =
forall_sources
(fun src ->
let name = src.name in
msg (Printf.sprintf "Pushing branch %s to mirror ..." name);
match Git.push name with
| Good -> msg "Success!"
| Bad errmsg ->
err (Printf.sprintf "Failed to push %s: “%s”" name errmsg))
let run_files () = Git.install_files ()
let print_kind = function
| Status -> "status"
| Check -> "check"
| Update -> "update"
| Prepare -> "prepare"
| Download -> "download"
| Clean -> "cleanup"
| Push -> "push"
| Files -> "files"
| Nil -> "()"
let dispatch_job j =
log_info (Printf.sprintf
"started in %s mode at %s."
(print_kind j.kind)
(format_time t_start));
match j.kind with
| Status -> run_status ()
| Check -> run_check ()
| Update -> run_update ()
| Prepare -> run_prepare ()
| Download -> run_download ()
| Clean -> run_cleanup ()
| Push -> run_push ()
| Files -> run_files ()
| Nil -> ()
(******************************************************************************
* main
*****************************************************************************)
let () =
let job = parse_argv () in
Git.set_path repo_dir;
dispatch_job job;
log_info (Printf.sprintf
"mirror bot finished in %.2f ms."
(Unix.gettimeofday () -. t_start));
;;