#!/usr/bin/env python # ***** BEGIN LICENSE BLOCK ***** # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # ***** END LICENSE BLOCK ***** """vcs_sync.py hg<->git conversions. Needs to support both the monolithic beagle/goanna.git type conversions, as well as many-to-many (l10n, build repos, etc.) """ from copy import deepcopy import mmap import os import pprint import re import sys import time try: import simplejson as json assert json except ImportError: import json sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0]))) import mozharness external_tools_path = os.path.join( os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))), 'external_tools', ) from mozharness.base.errors import HgErrorList, GitErrorList from mozharness.base.log import INFO, ERROR, FATAL from mozharness.base.python import VirtualenvMixin, virtualenv_config_options from mozharness.base.transfer import TransferMixin from mozharness.base.vcs.vcssync import VCSSyncScript from mozharness.mozilla.tooltool import TooltoolMixin # HgGitScript {{{1 class HgGitScript(VirtualenvMixin, TooltoolMixin, TransferMixin, VCSSyncScript): """ Beagle-oriented hg->git script (lots of mozilla-central hardcodes; assumption that we're going to be importing lots of branches). Beagle is a git repo of mozilla-central, with full cvs history, and a number of developer-oriented repositories and branches added. The partner-oriented goanna.git could also be incorporated into this script with some changes. """ mapfile_binary_search = None all_repos = None successful_repos = [] config_options = [ [["--no-check-incoming", ], { "action": "store_false", "dest": "check_incoming", "default": True, "help": "Don't check for incoming changesets", }], [["--max-log-sample-size", ], { "action": "store", "dest": "email_max_log_sample_size", "type": "int", "default": 102400, "help": "Specify the maximum number of characters from a log file to be " "embedded in the email body, per embedding (not total - note we " "embed two separate log samples into the email - so maximum " "email body size can end up a little over 2x this amount).", }], ] def __init__(self, require_config_file=True): super(HgGitScript, self).__init__( config_options=virtualenv_config_options + self.config_options, all_actions=[ 'clobber', 'list-repos', 'create-virtualenv', 'update-stage-mirror', 'update-work-mirror', 'create-git-notes', 'publish-to-mapper', 'push', 'combine-mapfiles', 'upload', 'notify', ], # These default actions are the update loop that we run after the # initial steps to create the work mirror with all the branches + # cvs history have been run. default_actions=[ 'list-repos', 'create-virtualenv', 'update-stage-mirror', 'update-work-mirror', 'push', 'combine-mapfiles', 'upload', 'notify', ], require_config_file=require_config_file ) self.remote_targets = None # Helper methods {{{1 def query_abs_dirs(self): """ Define paths. """ if self.abs_dirs: return self.abs_dirs abs_dirs = super(HgGitScript, self).query_abs_dirs() abs_dirs['abs_cvs_history_dir'] = os.path.join( abs_dirs['abs_work_dir'], 'mozilla-cvs-history') abs_dirs['abs_source_dir'] = os.path.join( abs_dirs['abs_work_dir'], 'stage_source') abs_dirs['abs_repo_sync_tools_dir'] = os.path.join( abs_dirs['abs_work_dir'], 'repo-sync-tools') abs_dirs['abs_git_rewrite_dir'] = os.path.join( abs_dirs['abs_work_dir'], 'mc-git-rewrite') abs_dirs['abs_target_dir'] = os.path.join(abs_dirs['abs_work_dir'], 'target') if 'conversion_dir' in self.config: abs_dirs['abs_conversion_dir'] = os.path.join( abs_dirs['abs_work_dir'], 'conversion', self.config['conversion_dir'] ) self.abs_dirs = abs_dirs return self.abs_dirs def init_git_repo(self, path, additional_args=None, deny_deletes=False): """ Create a git repo, with retries. We call this with additional_args=['--bare'] to save disk + make things cleaner. """ git = self.query_exe("git", return_type="list") cmd = git + ['init'] # generally for --bare if additional_args: cmd.extend(additional_args) cmd.append(path) status = self.retry( self.run_command, args=(cmd, ), error_level=FATAL, error_message="Can't set up %s!" % path ) status = self.run_command( git + ['config', 'receive.denyNonFastForwards', 'true'], cwd=path ) if deny_deletes: status = self.run_command( git + ['config', 'receive.denyDeletes', 'true'], cwd=path ) return status def write_hggit_hgrc(self, dest): """ Update .hg/hgrc, if not already updated """ hgrc = os.path.join(dest, '.hg', 'hgrc') contents = '' if os.path.exists(hgrc): contents = self.read_from_file(hgrc) if 'hggit=' not in contents: hgrc_update = """[extensions] hggit= [git] intree=1 """ self.write_to_file(hgrc, hgrc_update, open_mode='a') def _process_locale(self, locale, type, config, l10n_remote_targets, name, l10n_repos): """ This contains the common processing that we do on both goanna_config and gaia_config for a given locale. """ replace_dict = {'locale': locale} new_targets = deepcopy(config.get('targets', {})) for target in new_targets: dest = target['target_dest'] if '%(locale)s' in dest: new_dest = dest % replace_dict target['target_dest'] = new_dest remote_target = l10n_remote_targets.get(new_dest) if remote_target is None: # generate target if not seen before possible_remote_target = l10n_remote_targets.get(dest) # target may be remote, or local - we can tell by seeing # local targets will be none - remote targets will not, # so we can use this to test if it is local or remote if possible_remote_target is not None: remote_target = deepcopy(possible_remote_target) remote_repo = remote_target.get('repo') if '%(locale)s' in remote_repo: remote_target['repo'] = remote_repo % replace_dict l10n_remote_targets[new_dest] = remote_target long_name = '%s_%s_%s' % (type, name, locale) repo_dict = { 'repo': config['hg_url'] % replace_dict, 'revision': 'default', 'repo_name': long_name, 'conversion_dir': long_name, 'mapfile_name': '%s-mapfile' % long_name, 'targets': new_targets, 'vcs': 'hg', 'branch_config': { 'branches': { 'default': config['git_branch_name'], }, }, 'tag_config': config.get('tag_config', {}), 'mapper': config.get('mapper', {}), 'generate_git_notes': config.get('generate_git_notes', {}), } l10n_repos.append(repo_dict) def _query_l10n_repos(self): """ Since I didn't want to have to build a huge static list of l10n repos, and since it would be nicest to read the list of locales from their SSoT files. """ l10n_repos = [] l10n_remote_targets = deepcopy(self.config['remote_targets']) dirs = self.query_abs_dirs() goanna_dict = deepcopy(self.config['l10n_config'].get('goanna_config', {})) for name, goanna_config in goanna_dict.items(): file_name = self.download_file(goanna_config['locales_file_url'], parent_dir=dirs['abs_work_dir']) if not os.path.exists(file_name): self.error("Can't download locales from %s; skipping!" % goanna_config['locales_file_url']) continue contents = self.read_from_file(file_name) for locale in contents.splitlines(): self._process_locale(locale, 'goanna', goanna_config, l10n_remote_targets, name, l10n_repos) gaia_dict = deepcopy(self.config['l10n_config'].get('gaia_config', {})) for name, gaia_config in gaia_dict.items(): contents = self.retry( self.load_json_from_url, args=(gaia_config['locales_file_url'],) ) if not contents: self.error("Can't download locales from %s; skipping!" % gaia_config['locales_file_url']) continue for locale in dict(contents).keys(): self._process_locale(locale, 'gaia', gaia_config, l10n_remote_targets, name, l10n_repos) self.info("Built l10n_repos...") self.info(pprint.pformat(l10n_repos, indent=4)) self.info("Remote targets...") self.info(pprint.pformat(l10n_remote_targets, indent=4)) self.remote_targets = l10n_remote_targets return l10n_repos def _query_project_repos(self): """ Since I didn't want to have to build a huge static list of project branch repos. """ project_repos = [] for project in self.config.get("project_branches", []): repo_dict = { 'repo': self.config['project_branch_repo_url'] % {'project': project}, 'revision': 'default', 'repo_name': project, 'targets': [{ 'target_dest': 'github-project-branches', }], 'vcs': 'hg', 'branch_config': { 'branches': { 'default': project, }, }, 'tag_config': {}, } project_repos.append(repo_dict) self.info("Built project_repos...") self.info(pprint.pformat(project_repos, indent=4)) return project_repos def query_all_repos(self): """ Very simple method, but we need this concatenated list many times throughout the script. """ if self.all_repos: return self.all_repos if self.config.get('conversion_type') == 'b2g-l10n': self.all_repos = self._query_l10n_repos() elif self.config.get('initial_repo'): self.all_repos = [self.config['initial_repo']] + list(self.config.get('conversion_repos', [])) else: self.all_repos = list(self.config.get('conversion_repos', [])) if self.config.get('conversion_type') == 'project-branches': self.all_repos += self._query_project_repos() return self.all_repos def query_all_non_failed_repos(self): """ Same as query_all_repos(self) but filters out repos that failed in an earlier action - so use this for downstream actions that require earlier actions did not fail for a given repo. """ all_repos = self.query_all_repos() return [repo for repo in all_repos if repo.get('repo_name') not in self.failures] def _query_repo_previous_status(self, repo_name, repo_map=None): """ Return False if previous run was unsuccessful. Return None if no previous run information. """ if repo_map is None: repo_map = self._read_repo_update_json() return repo_map.get('repos', {}).get(repo_name, {}).get('previous_push_successful') def _update_repo_previous_status(self, repo_name, successful_flag, repo_map=None, write_update=False): """ Set the repo_name to successful_flag (False for unsuccessful, True for successful) """ if repo_map is None: repo_map = self._read_repo_update_json() repo_map.setdefault('repos', {}).setdefault(repo_name, {})['previous_push_successful'] = successful_flag if write_update: self._write_repo_update_json(repo_map) return repo_map def _update_stage_repo(self, repo_config, retry=True, clobber=False): """ Update a stage repo. See update_stage_mirror() for a description of the stage repos. """ hg = self._query_hg_exe() dirs = self.query_abs_dirs() repo_name = repo_config['repo_name'] source_dest = os.path.join(dirs['abs_source_dir'], repo_name) if clobber: self.rmtree(source_dest) if not os.path.exists(source_dest): if self.retry( self.run_command, args=(hg + ['clone', '--noupdate', repo_config['repo'], source_dest], ), kwargs={ 'output_timeout': 15 * 60, 'cwd': dirs['abs_work_dir'], 'error_list': HgErrorList, }, ): if retry: return self._update_stage_repo( repo_config, retry=False, clobber=True) else: # Don't leave a failed clone behind self.rmtree(source_dest) self._update_repo_previous_status(repo_name, successful_flag=False, write_update=True) self.add_failure( repo_name, message="Can't clone %s!" % repo_config['repo'], level=ERROR, ) elif self.config['check_incoming'] and repo_config.get("check_incoming", True): previous_status = self._query_repo_previous_status(repo_name) if previous_status is None: self.info("No previous status for %s; skipping incoming check!" % repo_name) elif previous_status is False: self.info("Previously unsuccessful status for %s; skipping incoming check!" % repo_name) else: # Run |hg incoming| and skip all subsequent actions if there # are no no changes. # If you want to bypass this behavior (e.g. to update branches/tags # on a repo without requiring a new commit), set # repo_config["incoming_check"] = False. cmd = hg + ['incoming', '-n', '-l', '1'] status = self.retry( self.run_command, args=(cmd, ), kwargs={ 'output_timeout': 5 * 60, 'cwd': source_dest, 'error_list': HgErrorList, 'success_codes': [0, 1, 256], }, ) if status in (1, 256): self.info("No changes for %s; skipping." % repo_name) # Overload self.failures to tell downstream actions to noop on # this repo self.failures.append(repo_name) return elif status != 0: self.add_failure( repo_name, message="Error getting changes for %s; skipping!" % repo_config['repo_name'], level=ERROR, ) self._update_repo_previous_status(repo_name, successful_flag=False, write_update=True) return cmd = hg + ['pull'] if self.retry( self.run_command, args=(cmd, ), kwargs={ 'output_timeout': 15 * 60, 'cwd': source_dest, 'error_list': HgErrorList, }, ): if retry: return self._update_stage_repo( repo_config, retry=False, clobber=True) else: self._update_repo_previous_status(repo_name, successful_flag=False, write_update=True) self.add_failure( repo_name, message="Can't pull %s!" % repo_config['repo'], level=ERROR, ) # commenting out hg verify since it takes ~5min per repo; hopefully # exit codes will save us # if self.run_command(hg + ["verify"], cwd=source_dest): # if retry: # return self._update_stage_repo(repo_config, retry=False, clobber=True) # else: # self.fatal("Can't verify %s!" % source_dest) def _do_push_repo(self, base_command, refs_list=None, kwargs=None): """ Helper method for _push_repo() since it has to be able to break out of the target_repo list loop, and the commands loop borks that. """ commands = [] if refs_list: while len(refs_list) > 10: commands.append(base_command + refs_list[0:10]) refs_list = refs_list[10:] commands.append(base_command + refs_list) else: commands = [base_command] if kwargs is None: kwargs = {} for command in commands: # Do the push, with retry! if self.retry( self.run_command, args=(command, ), kwargs=kwargs, ): return -1 def _push_repo(self, repo_config): """ Push a repo to a path ("test_push") or remote server. This was meant to be a cross-vcs method, but currently only covers git pushes. """ dirs = self.query_abs_dirs() conversion_dir = self.query_abs_conversion_dir(repo_config) if not conversion_dir: self.fatal("No conversion_dir for %s!" % repo_config['repo_name']) source_dir = os.path.join(dirs['abs_source_dir'], repo_config['repo_name']) git = self.query_exe('git', return_type='list') hg = self._query_hg_exe() return_status = '' for target_config in repo_config['targets']: test_push = False remote_config = {} if target_config.get("test_push"): test_push = True force_push = target_config.get("force_push") target_name = os.path.join( dirs['abs_target_dir'], target_config['target_dest']) target_vcs = target_config.get("vcs") else: target_name = target_config['target_dest'] if self.remote_targets is None: self.remote_targets = self.config.get('remote_targets', {}) remote_config = self.remote_targets.get(target_name, target_config) force_push = remote_config.get("force_push", target_config.get("force_push")) target_vcs = remote_config.get("vcs", target_config.get("vcs")) if target_vcs == "git": base_command = git + ['push'] env = {} if force_push: base_command.append("-f") if test_push: target_git_repo = target_name else: target_git_repo = remote_config['repo'] # Allow for using a custom git ssh key. env['GIT_SSH_KEY'] = remote_config['ssh_key'] env['GIT_SSH'] = os.path.join(external_tools_path, 'git-ssh-wrapper.sh') base_command.append(target_git_repo) # Allow for pushing a subset of repo branches to the target. # If we specify that subset, we can also specify different # names for those branches (e.g. b2g18 -> master for a # standalone b2g18 repo) # We query hg for these because the conversion dir will have # branches from multiple hg repos, and the regexes may match # too many things. refs_list = [] if repo_config.get('generate_git_notes', False): refs_list.append('+refs/notes/commits:refs/notes/commits') branch_map = self.query_branches( target_config.get('branch_config', repo_config.get('branch_config', {})), source_dir, ) # If the target_config has a branch_config, the key is the # local git branch and the value is the target git branch. if target_config.get("branch_config"): for (branch, target_branch) in branch_map.items(): refs_list += ['+refs/heads/%s:refs/heads/%s' % (branch, target_branch)] # Otherwise the key is the hg branch and the value is the git # branch; use the git branch for both local and target git # branch names. else: for (hg_branch, git_branch) in branch_map.items(): refs_list += ['+refs/heads/%s:refs/heads/%s' % (git_branch, git_branch)] # Allow for pushing a subset of tags to the target, via name or # regex. Again, query hg for this list because the conversion # dir will contain tags from multiple hg repos, and the regexes # may match too many things. tag_config = target_config.get('tag_config', repo_config.get('tag_config', {})) if tag_config.get('tags'): for (tag, target_tag) in tag_config['tags'].items(): refs_list += ['+refs/tags/%s:refs/tags/%s' % (tag, target_tag)] if tag_config.get('tag_regexes'): regex_list = [] for regex in tag_config['tag_regexes']: regex_list.append(re.compile(regex)) tag_list = self.get_output_from_command( hg + ['tags'], cwd=source_dir, ) if tag_list is not None: for tag_line in tag_list.splitlines(): if not tag_line: continue tag_parts = tag_line.split() if not tag_parts: self.error("Bogus tag_line? %s" % str(tag_line)) continue tag_name = tag_parts[0] for regex in regex_list: if tag_name != 'tip' and regex.search(tag_name) is not None: refs_list += ['+refs/tags/%s:refs/tags/%s' % (tag_name, tag_name)] continue error_msg = "%s: Can't push %s to %s!\n" % (repo_config['repo_name'], conversion_dir, target_git_repo) if self._do_push_repo( base_command, refs_list=refs_list, kwargs={ 'output_timeout': target_config.get("output_timeout", 30 * 60), 'cwd': os.path.join(conversion_dir, '.git'), 'error_list': GitErrorList, 'partial_env': env, } ): if target_config.get("test_push"): error_msg += "This was a test push that failed; not proceeding any further with %s!\n" % repo_config['repo_name'] self.error(error_msg) return_status += error_msg if target_config.get("test_push"): break else: # TODO write hg error_msg = "%s: Don't know how to deal with vcs %s!\n" % ( target_config['target_dest'], target_vcs) self.error(error_msg) return_status += error_msg return return_status def _query_mapped_revision(self, revision=None, mapfile=None): """ Use the virtualenv mapper module to search a mapfile for a revision. """ if not callable(self.mapfile_binary_search): site_packages_path = self.query_python_site_packages_path() sys.path.append(os.path.join(site_packages_path, 'mapper')) try: from bsearch import mapfile_binary_search global log log = self.log_obj self.mapfile_binary_search = mapfile_binary_search except ImportError, e: self.fatal("Can't import mapfile_binary_search! %s\nDid you create-virtualenv?" % str(e)) # I wish mapper did this for me, but ... fd = open(mapfile, 'rb') m = mmap.mmap(fd.fileno(), 0, mmap.MAP_PRIVATE, mmap.PROT_READ) return self.mapfile_binary_search(m, revision) def _post_fatal(self, message=None, exit_code=None): """ After we call fatal(), run this method before exiting. """ if 'notify' in self.actions: self.notify(message=message, fatal=True) self.copy_logs_to_upload_dir() def _read_repo_update_json(self): """ repo_update.json is a file we create with information about each repo we're converting: git/hg branch names, git/hg revisions, pull datetime/timestamp, and push datetime/timestamp. Since we want to be able to incrementally update portions of this file as we pull/push each branch, we need to be able to read the json into memory, so we can update the dict and re-write the json to disk. """ repo_map = {} dirs = self.query_abs_dirs() path = os.path.join(dirs['abs_upload_dir'], 'repo_update.json') if os.path.exists(path): fh = open(path, 'r') repo_map = json.load(fh) fh.close() return repo_map def query_abs_conversion_dir(self, repo_config): dirs = self.query_abs_dirs() if repo_config.get('conversion_dir'): dest = os.path.join(dirs['abs_work_dir'], 'conversion', repo_config['conversion_dir']) else: dest = dirs.get('abs_conversion_dir') return dest def _write_repo_update_json(self, repo_map): """ The write portion of _read_repo_update_json(). """ dirs = self.query_abs_dirs() contents = json.dumps(repo_map, sort_keys=True, indent=4) self.write_to_file( os.path.join(dirs['abs_upload_dir'], 'repo_update.json'), contents, create_parent_dir=True ) def _query_hg_exe(self): """Returns the hg executable command as a list """ # If "hg" is set in "exes" section of config use that. # If not, get path from self.query_virtualenv_path() method # (respects --work-dir and --venv-path and --virtualenv-path). exe_command = self.query_exe('hg', return_type="list", default=[os.path.join(self.query_virtualenv_path(), "bin", "hg")]) # possible additional command line options can be specified in "hg_options" of self.config hg_options = self.config.get("hg_options", ()) exe_command.extend(hg_options) return exe_command def query_branches(self, branch_config, repo_path, vcs='hg'): """ Given a branch_config of branches and branch_regexes, return a dict of existing branch names to target branch names. """ branch_map = {} if "branches" in branch_config: branch_map = deepcopy(branch_config['branches']) if "branch_regexes" in branch_config: regex_list = list(branch_config['branch_regexes']) full_branch_list = [] if vcs == 'hg': hg = self._query_hg_exe() # This assumes we always want closed branches as well. # If not we may need more options. output = self.get_output_from_command( hg + ['branches', '-a'], cwd=repo_path ) if output: for line in output.splitlines(): full_branch_list.append(line.split()[0]) elif vcs == 'git': git = self.query_exe("git", return_type="list") output = self.get_output_from_command( git + ['branch', '-l'], cwd=repo_path ) if output: for line in output.splitlines(): full_branch_list.append(line.replace('*', '').split()[0]) for regex in regex_list: for branch in full_branch_list: m = re.search(regex, branch) if m: # Don't overwrite branch_map[branch] if it exists branch_map.setdefault(branch, branch) return branch_map def _combine_mapfiles(self, mapfiles, combined_mapfile, cwd=None): """ Adapted from repo-sync-tools/combine_mapfiles Consolidate multiple conversion processes' mapfiles into a single mapfile. """ self.info("Determining whether we need to combine mapfiles...") if cwd is None: cwd = self.query_abs_dirs()['abs_upload_dir'] existing_mapfiles = [] for f in mapfiles: f_path = os.path.join(cwd, f) if os.path.exists(f_path): existing_mapfiles.append(f) else: self.warning("%s doesn't exist!" % f_path) combined_mapfile_path = os.path.join(cwd, combined_mapfile) if os.path.exists(combined_mapfile_path): combined_timestamp = os.path.getmtime(combined_mapfile_path) for f in existing_mapfiles: f_path = os.path.join(cwd, f) if os.path.getmtime(f_path) > combined_timestamp: # Yes, we want to combine mapfiles break else: self.info("No new mapfiles to combine.") return self.move(combined_mapfile_path, "%s.old" % combined_mapfile_path) output = self.get_output_from_command( ['sort', '--unique', '-t', ' ', '--key=2'] + existing_mapfiles, silent=True, halt_on_failure=True, cwd=cwd, ) self.write_to_file(combined_mapfile_path, output, verbose=False, error_level=FATAL) self.run_command(['ln', '-sf', combined_mapfile, '%s-latest' % combined_mapfile], cwd=cwd) # Actions {{{1 def list_repos(self): repos = self.query_all_repos() self.info(pprint.pformat(repos)) def create_test_targets(self): """ This action creates local directories to do test pushes to. """ dirs = self.query_abs_dirs() for repo_config in self.query_all_non_failed_repos(): for target_config in repo_config['targets']: if not target_config.get('test_push'): continue target_dest = os.path.join(dirs['abs_target_dir'], target_config['target_dest']) if not os.path.exists(target_dest): self.info("Creating local target repo %s." % target_dest) if target_config.get("vcs", "git") == "git": self.init_git_repo(target_dest, additional_args=['--bare', '--shared=all'], deny_deletes=True) else: self.fatal("Don't know how to deal with vcs %s!" % target_config['vcs']) else: self.debug("%s exists; skipping." % target_dest) def update_stage_mirror(self): """ The stage mirror is a buffer clean clone of repositories. The logic behind this is that we get occasional corruption from |hg pull|. It's much less time-consuming to detect this in a clean clone, and reclone, than to detect this in a working conversion directory, and try to repair or reclone+reconvert. We pull the stage mirror into the work mirror, where the conversion is done. """ for repo_config in self.query_all_non_failed_repos(): self._update_stage_repo(repo_config) def pull_out_new_sha_lookups(self, old_file, new_file): """ This method will return an iterator which iterates through lines in file new_file that do not exist in old_file. If old_file can't be read, all lines in new_file are returned. It does not cause any problems if lines exist in old_file that do not exist in new_file. Results are sorted by the second field (text after first space in line). This is somewhat equivalent to: ( [ ! -f "${old_file}" ] && cat "${new_file}" || diff "${old_file}" "${new_file}" | sed -n 's/> //p' ) | sort -k2""" with self.opened(old_file) as (old, err): if err: self.info('Map file %s not found - probably first time this has run.' % old_file) old_set = frozenset() else: old_set = frozenset(old) with self.opened(new_file, 'rt') as (new, err): if err: self.error('Could not read contents of map file %s:\n%s' % (new_file, err.message)) new_set = frozenset() else: new_set = frozenset(new) for line in sorted(new_set.difference(old_set), key=lambda line: line.partition(' ')[2]): yield line def update_work_mirror(self): """ Pull the latest changes into the work mirror, update the repo_map json, and run |hg gexport| to convert those latest changes into the git conversion repo. """ hg = self._query_hg_exe() git = self.query_exe("git", return_type="list") dirs = self.query_abs_dirs() repo_map = self._read_repo_update_json() timestamp = int(time.time()) datetime = time.strftime('%Y-%m-%d %H:%M %Z') repo_map['last_pull_timestamp'] = timestamp repo_map['last_pull_datetime'] = datetime for repo_config in self.query_all_non_failed_repos(): repo_name = repo_config['repo_name'] source = os.path.join(dirs['abs_source_dir'], repo_name) dest = self.query_abs_conversion_dir(repo_config) if not dest: self.fatal("No conversion_dir for %s!" % repo_name) if not os.path.exists(dest): self.mkdir_p(os.path.dirname(dest)) self.run_command(hg + ['clone', '--noupdate', source, dest], error_list=HgErrorList, halt_on_failure=False) if os.path.exists(dest): self.write_hggit_hgrc(dest) self.init_git_repo('%s/.git' % dest, additional_args=['--bare']) self.run_command( git + ['--git-dir', '%s/.git' % dest, 'config', 'gc.auto', '0'], ) else: self.add_failure( repo_name, message="Failed to clone %s!" % source, level=ERROR, ) continue # Build branch map. branch_map = self.query_branches( repo_config.get('branch_config', {}), source, ) for (branch, target_branch) in branch_map.items(): output = self.get_output_from_command( hg + ['id', '-r', branch], cwd=source ) if output: rev = output.split(' ')[0] else: self.add_failure( repo_name, message="Branch %s doesn't exist in %s (%s cloned into staging directory %s)!" % (branch, repo_name, repo_config.get('repo'), source), level=ERROR, ) continue timestamp = int(time.time()) datetime = time.strftime('%Y-%m-%d %H:%M %Z') if self.run_command(hg + ['pull', '-r', rev, source], cwd=dest, error_list=HgErrorList): # We shouldn't have an issue pulling! self.add_failure( repo_name, message="Unable to pull %s from stage_source; clobbering and skipping!" % repo_name, level=ERROR, ) self._update_repo_previous_status(repo_name, successful_flag=False, write_update=True) # don't leave a dirty checkout behind, and skip remaining branches self.rmtree(source) break self.run_command( hg + ['bookmark', '-f', '-r', rev, target_branch], cwd=dest, error_list=HgErrorList, ) # This might get a little large. repo_map.setdefault('repos', {}).setdefault(repo_name, {}).setdefault('branches', {})[branch] = { 'hg_branch': branch, 'hg_revision': rev, 'git_branch': target_branch, 'pull_timestamp': timestamp, 'pull_datetime': datetime, } if self.query_failure(repo_name): # We hit an error in the for loop above continue self.retry( self.run_command, args=(hg + ['-v', 'gexport'], ), kwargs={ 'output_timeout': 15 * 60, 'cwd': dest, 'error_list': HgErrorList, }, error_level=FATAL, ) generated_mapfile = os.path.join(dest, '.hg', 'git-mapfile') self.copy_to_upload_dir( generated_mapfile, dest=repo_config.get('mapfile_name', self.config.get('mapfile_name', "goanna-mapfile")), log_level=INFO ) for (branch, target_branch) in branch_map.items(): git_revision = self._query_mapped_revision( revision=rev, mapfile=generated_mapfile) repo_map['repos'][repo_name]['branches'][branch]['git_revision'] = git_revision self._write_repo_update_json(repo_map) def create_git_notes(self): git = self.query_exe("git", return_type="list") for repo_config in self.query_all_non_failed_repos(): repo = repo_config['repo'] if repo_config.get('generate_git_notes', False): dest = self.query_abs_conversion_dir(repo_config) # 'git-mapfile' is created by hggit plugin, containing all the mappings complete_mapfile = os.path.join(dest, '.hg', 'git-mapfile') # 'added-to-git-notes' is the set of mappings known to be recorded in the git notes # of the project (typically 'git-mapfile' from previous run) added_to_git_notes = os.path.join(dest, '.hg', 'added-to-git-notes') # 'delta-git-notes' is the set of new mappings found on this iteration, that # now need to be added to the git notes of the project (the diff between the # previous two files described) delta_git_notes = os.path.join(dest, '.hg', 'delta-git-notes') git_dir = os.path.join(dest, '.git') self.rmtree(delta_git_notes) git_notes_adding_successful = True with self.opened(delta_git_notes, open_mode='w') as (delta_out, err): if err: git_notes_adding_successful = False self.warn("Could not write list of unprocessed git note mappings to file %s - not critical" % delta_git_notes) else: for sha_lookup in self.pull_out_new_sha_lookups(added_to_git_notes, complete_mapfile): print >>delta_out, sha_lookup, (git_sha, hg_sha) = sha_lookup.split() # only add git note if not already there - note # devs may have added their own notes, so don't # replace any existing notes, just add to them output = self.get_output_from_command( git + ['notes', 'show', git_sha], cwd=git_dir, ignore_errors=True ) git_note_text='Upstream source: %s/rev/%s' % (repo, hg_sha) git_notes_add_return_code = 1 if not output or output.find(git_note_text) < 0: git_notes_add_return_code = self.run_command( git + ['notes', 'append', '-m', git_note_text, git_sha], cwd=git_dir ) # if note was successfully added, or it was already there, we can # mark it as added, by putting it in the delta file... if git_notes_add_return_code == 0 or output.find(git_note_text) >= 0: print >>delta_out, sha_lookup, else: self.error("Was not able to append required git note for git commit %s ('%s')" % (git_sha, git_note_text)) git_notes_adding_successful = False if git_notes_adding_successful: self.copyfile(complete_mapfile, added_to_git_notes) else: self.info("Not creating git notes for repo %s (generate_git_notes not set to True)" % repo) def publish_to_mapper(self): """ This method will attempt to create git notes for any new git<->hg mappings found in the generated_mapfile file and also push new mappings to mapper service.""" for repo_config in self.query_all_non_failed_repos(): dest = self.query_abs_conversion_dir(repo_config) # 'git-mapfile' is created by hggit plugin, containing all the mappings complete_mapfile = os.path.join(dest, '.hg', 'git-mapfile') # 'published-to-mapper' is all the mappings that are known to be published # to mapper, for this project (typically the 'git-mapfile' from the previous # run) published_to_mapper = os.path.join(dest, '.hg', 'published-to-mapper') # 'delta-for-mapper' is the set of mappings that need to be published to # mapper on this iteration, i.e. the diff between the previous two files # described delta_for_mapper = os.path.join(dest, '.hg', 'delta-for-mapper') self.rmtree(delta_for_mapper) # we only replace published_to_mapper if we successfully updated # pushed to mapper mapper_config = repo_config.get('mapper', {}) if mapper_config: site_packages_path = self.query_python_site_packages_path() if site_packages_path not in sys.path: sys.path.append(site_packages_path) try: import requests except ImportError as e: self.error("Can't import requests: %s\nDid you create-virtualenv?" % str(e)) mapper_url = mapper_config['url'] mapper_project = mapper_config['project'] insert_url = "%s/%s/insert/ignoredups" % (mapper_url, mapper_project) headers = { 'Content-Type': 'text/plain', 'Authentication': 'Bearer %s' % os.environ["RELENGAPI_INSERT_HGGIT_MAPPINGS_AUTH_TOKEN"] } all_new_mappings = [] all_new_mappings.extend(self.pull_out_new_sha_lookups(published_to_mapper, complete_mapfile)) self.write_to_file(delta_for_mapper, "".join(all_new_mappings)) # due to timeouts on load balancer, we only push 200 lines at a time # this means that we should get http response back within 30 seconds # including the time it takes to insert the mappings in the database publish_successful = True for i in range(0, len(all_new_mappings), 200): r = requests.post(insert_url, data="".join(all_new_mappings[i:i+200]), headers=headers) if (r.status_code != 200): self.error("Could not publish mapfile ('%s') line range [%s, %s] to mapper (%s) - received http %s code" % (delta_for_mapper, i, i+200, insert_url, r.status_code)) publish_successful = False # we won't break out, since we may be able to publish other mappings # and duplicates are allowed, so we will push the whole lot again next # time anyway else: self.info("Published mapfile ('%s') line range [%s, %s] to mapper (%s)" % (delta_for_mapper, i, i+200, insert_url)) if publish_successful: # if we get this far, we know we could successfully post to mapper, so now # we can copy the mapfile over "previously generated" version # so that we don't push to mapper for these commits again self.copyfile(complete_mapfile, published_to_mapper) else: self.copyfile(complete_mapfile, published_to_mapper) def combine_mapfiles(self): """ This method is for any job (l10n, project-branches) that needs to combine mapfiles. """ if not self.config.get("combined_mapfile"): self.info("No combined_mapfile set in config; skipping!") return dirs = self.query_abs_dirs() mapfiles = [] if self.config.get('conversion_type') == 'b2g-l10n': for repo_config in self.query_all_non_failed_repos(): if repo_config.get("mapfile_name"): mapfiles.append(repo_config['mapfile_name']) else: mapfiles.append(self.config.get('mapfile_name', 'goanna-mapfile')) if self.config.get('external_mapfile_urls'): for url in self.config['external_mapfile_urls']: file_name = self.download_file( url, parent_dir=dirs['abs_upload_dir'], error_level=FATAL, ) mapfiles.append(file_name) if not mapfiles: self.info("No mapfiles to combine; skipping!") return self._combine_mapfiles(mapfiles, self.config['combined_mapfile']) def push(self): """ Push to all targets. test_targets are local directory test repos; the rest are remote. Updates the repo_map json. """ self.create_test_targets() repo_map = self._read_repo_update_json() failure_msg = "" timestamp = int(time.time()) datetime = time.strftime('%Y-%m-%d %H:%M %Z') repo_map['last_push_timestamp'] = timestamp repo_map['last_push_datetime'] = datetime for repo_config in self.query_all_non_failed_repos(): timestamp = int(time.time()) datetime = time.strftime('%Y-%m-%d %H:%M %Z') status = self._push_repo(repo_config) repo_name = repo_config['repo_name'] if not status: # good if repo_name not in self.successful_repos: self.successful_repos.append(repo_name) repo_map.setdefault('repos', {}).setdefault(repo_name, {})['push_timestamp'] = timestamp repo_map['repos'][repo_name]['push_datetime'] = datetime previous_status = self._query_repo_previous_status(repo_name, repo_map=repo_map) if previous_status is None: self.add_summary("Possibly the first successful push of %s." % repo_name) elif previous_status is False: self.add_summary("Previously unsuccessful push of %s is now successful!" % repo_name) self._update_repo_previous_status(repo_name, successful_flag=True, repo_map=repo_map, write_update=True) else: self.add_failure( repo_name, message="Unable to push %s." % repo_name, level=ERROR, ) failure_msg += status + "\n" self._update_repo_previous_status(repo_name, successful_flag=False, repo_map=repo_map, write_update=True) if not failure_msg: repo_map['last_successful_push_timestamp'] = repo_map['last_push_timestamp'] repo_map['last_successful_push_datetime'] = repo_map['last_push_datetime'] self._write_repo_update_json(repo_map) if failure_msg: self.fatal("Unable to push these repos:\n%s" % failure_msg) def preflight_upload(self): if not self.config.get("copy_logs_post_run", True): self.copy_logs_to_upload_dir() def upload(self): """ Upload the upload_dir according to the upload_config. """ failure_msg = '' dirs = self.query_abs_dirs() for upload_config in self.config.get('upload_config', []): if self.retry( self.rsync_upload_directory, args=( dirs['abs_upload_dir'], ), kwargs=upload_config, ): failure_msg += '%s:%s' % (upload_config['remote_host'], upload_config['remote_path']) if failure_msg: self.fatal("Unable to upload to this location:\n%s" % failure_msg) # __main__ {{{1 if __name__ == '__main__': conversion = HgGitScript() conversion.run()