Logo Search packages:      
Sourcecode: bazaar version File versions

inv-ids.c

/* inv-ids.c:
 *
 ****************************************************************
 * Copyright (C) 2002, 2003 Tom Lord
 *
 * See the file "COPYING" for further information about
 * the copyright and warranty status of this work.
 */


#include "hackerlab/bugs/panic.h"
#include "hackerlab/os/errno.h"
#include "hackerlab/os/errno-to-string.h"
#include "hackerlab/os/time.h"
#include "hackerlab/os/sys/types.h"
#include "hackerlab/os/unistd.h"
#include "hackerlab/mem/mem.h"
#include "hackerlab/char/char-class.h"
#include "hackerlab/char/str.h"
#include "hackerlab/fmt/cvt.h"
#include "hackerlab/fs/file-names.h"
#include "hackerlab/vu/safe.h"
#include "hackerlab/arrays/ar.h"
#include "libfsutils/ensure-dir.h"
#include "libarch/my.h"
#include "libarch/project-tree.h"
#include "libarch/patch-logs.h"
#include "libarch/invent.h"
#include "libarch/inode-sig.h"
#include "libarch/inv-ids.h"
#include "libarch/pfs.h"
#include "hackerlab/fs/cwd.h"
#include "libawk/relational.h"
#include "libarch/changelogs.h"



enum ftag_method
{
  ftag_names,
  ftag_implicit,
  ftag_tagline,
  ftag_explicit
};


/* __STDC__ prototypes for static functions */
static t_uchar * file_id (int * errn,
                          struct alloc_limits * limits,
                          enum ftag_method method,
                          int untagged_is_source,
                          t_uchar * path,
                          assoc_table id_tagging_shortcut,
                          struct stat * known_lstat,
                          assoc_table * explicit_skips);
static int is_at_or_underneath_archdir (char * rel_file);
static int filename_matches (regex_t * pattern, char * filename);
static t_uchar * explicit_id (int * errn,
                              assoc_table * skips,
                              struct alloc_limits * limits,
                              t_uchar * arg_file,
                              t_uchar * id_file,
                              t_uchar * prefix,
                              t_uchar * postfix);
static t_uchar * implicit_id (int * errn,
                              struct alloc_limits * limits,
                              t_uchar * file,
                              t_uchar * basename,
                              t_uchar * prefix,
                              struct stat * statb,
                              assoc_table id_tagging_shortcut);
static long smash_non_graphical (t_uchar * buf, long amt);



t_uchar *
arch_log_file_id (t_uchar * archive, t_uchar * revision)
{
  t_uchar * log_file_path = 0;
  t_uchar * answer = 0;

  log_file_path = arch_log_file (".", archive, revision);
  answer = str_alloc_cat (0, "A_", log_file_path);

  lim_free (0, log_file_path);
  return answer;
}


assoc_table
arch_filenames_ids (rel_table * file_list, t_uchar * tree_root)
{
  struct arch_inventory_options options = {0, };
  int i = 0;
  int file_size = rel_n_records (* file_list);
  assoc_table id_list = 0;

  options.categories = arch_inventory_source;
  options.want_ids = 1;
  options.include_excluded = 1;
  arch_get_inventory_naming_conventions (&options, tree_root);

  for (i = 0; i != file_size; ++ i)
    {
      t_uchar * id = arch_inventory_id (options.method, 0, (* file_list) [i] [0], 0, 0, 0);
      assoc_set (&id_list, id, (* file_list) [i] [0]);
      lim_free (0, id);
    }

  arch_free_inventory_naming_conventions (&options);
  return id_list;
}


t_uchar *
arch_inventory_id (enum arch_id_tagging_method method,
                    int untagged_is_source,
                    t_uchar * path,
                    assoc_table id_tagging_shortcut,
                    struct stat * known_lstat,
                    assoc_table * explicit_skips)
{
  int errn;
  t_uchar * answer;
  enum ftag_method m;

  if (method == arch_unspecified_id_tagging)
    {
      t_uchar * dir = 0;
      t_uchar * root = 0;

      dir = file_name_directory_file (0, path);
      root = arch_tree_root (0, dir, 0);

      if (!root)
        {
          method = arch_names_id_tagging;
          untagged_is_source = 1;
        }
      else
        {
          enum arch_inventory_category untagged_category;

          method = arch_tree_id_tagging_method (&untagged_category, root, 0);
          untagged_is_source = (untagged_category == arch_inventory_source);
        }

      lim_free (0, dir);
      lim_free (0, root);
    }

  switch (method)
    {
    default:
      panic ("unrecognized method in arch_inventory_id");
      break;

    case arch_names_id_tagging:
      {
        m = ftag_names;
        break;
      }
    case arch_explicit_id_tagging:
      {
        m = ftag_explicit;
        break;
      }
    case arch_implicit_id_tagging:
      {
        m = ftag_implicit;
        break;
      }
    case arch_tagline_id_tagging:
      {
        m = ftag_tagline;
        break;
      }
    }



  errn = 0;

  answer = file_id (&errn, 0, m, untagged_is_source, path, id_tagging_shortcut, known_lstat, explicit_skips);

  if (!answer && errn)
    {
      safe_printfmt (2, "error finding file id (%d: %s)\n path: %s\n", errn, errno_to_string(errn), path);
      panic ("arch_inventory_id");
    }

  return answer;
}

t_uchar *
arch_id_tagging_method_name (enum arch_id_tagging_method m)
{
  switch (m)
    {
    default:
      panic ("unknown id tagging method (arch_id_tagging_method_name)");
      return 0;                 /* not reached */

    case arch_names_id_tagging:            return str_save (0, "names");
    case arch_implicit_id_tagging:         return str_save (0, "implicit");
    case arch_tagline_id_tagging:          return str_save (0, "tagline");
    case arch_explicit_id_tagging:         return str_save (0, "explicit");
    }
}


enum arch_id_tagging_method
arch_id_tagging_method_from_name (t_uchar * name)
{
  if (!str_casecmp (name, "explicit"))
    return arch_explicit_id_tagging;
  else if (!str_casecmp (name, "implicit"))
    return arch_implicit_id_tagging;
  else if (!str_casecmp (name, "tagline"))
    return arch_tagline_id_tagging;
  else if (!str_casecmp (name, "names"))
    return arch_names_id_tagging;
  else
    {
      safe_printfmt (2, "no such id tagging method (%s)\n", name);
      exit (2);
      return arch_names_id_tagging; /* notreached */
    }
}


t_uchar *
arch_default_id_tagging_method_contents (enum arch_id_tagging_method method)
{
  t_uchar * method_name = 0;
  t_uchar * excludes_regexp = 0;
  t_uchar * junk_regexp = 0;
  t_uchar * backup_regexp = 0;
  t_uchar * precious_regexp = 0;
  t_uchar * unrecognized_regexp = 0;
  t_uchar * source_regexp = 0;
  t_uchar * answer = 0;


  if (method == arch_unspecified_id_tagging)
    method = arch_tagline_id_tagging;

  method_name = arch_id_tagging_method_name (method);
  excludes_regexp = arch_default_naming_conventions_regexp (arch_inventory_excludes);
  backup_regexp = arch_default_naming_conventions_regexp (arch_inventory_backup);
  junk_regexp = arch_default_naming_conventions_regexp (arch_inventory_junk);
  precious_regexp = arch_default_naming_conventions_regexp (arch_inventory_precious);
  unrecognized_regexp = arch_default_naming_conventions_regexp (arch_inventory_unrecognized);
  source_regexp = arch_default_naming_conventions_regexp (arch_inventory_source);


  answer = str_alloc_cat_many (0,
                               ("# id tagging method\n"
                                "#\n"
                                "# This determines how \"inventory ids\", strings conveying\n"
                                "# logical file identity, are computed for each file, directory\n"
                                "# and symbolic link.\n"
                                "#\n"
                                "# The choices are:\n"
                                "#\n"
                                "# tagline: inventory ids may be set using add-id, or omitted\n"
                                "#          (though tree-lint warns about omitted ids), or in\n"
                                "#          text files, set in a comment line near the top or\n"
                                "#          bottom of the file of a form like \"<PUNCT> arch-tag: <STRING>\".\n"
                                "#          Renames of files with no id are treated as a combined\n"
                                "#          add and delete (e.g., local changes can be lost).\n"
                                "#\n"
                                "# explicit: ids must be set using add-id.  Files passing the naming\n"
                                "#          conventions for source, but lacking add-id ids, are treated\n"
                                "#          as unrecognized files (see below).\n"
                                "#\n"
                                "# names: ids are not used.  All renames are treated as add+delete\n"
                                "#\n"
                                "# implicit: similar to tagline, but in addition, the id comment\n"
                                "#          may be of the form \"<PUNCT> <BASENAME> - <STRING>\", where\n"
                                "#          <BASENAME> is the basename of the file.   This method\n"
                                "#          is not recommended, but is retained for backwards\n"
                                "#          compatibility.\n"
                                "#\n"
                                "\n"),
                               ("explicit\n"
                                "\n"),
                               ("# disposition of untagged source files\n"
                                "#\n"
                                "# (NOTE: this option must follow the tagline/explicit/names/implicit\n"
                                "# directive.)\n"
                                "#\n"
                                "# By default, the explicit method treats untagged files matching the naming\n"
                                "# conventions for source files as unrecognized and the implicit and tagline\n"
                                "# methods treat such untagged files as source.\n"
                                "#\n"
                                "# You can override those default treatments of untagged files by specifying\n"
                                "# which inventory category (see below) should be used for files whose names\n"
                                "# suggest they are source but which lack ids.\n"
                                "#\n"
                                "# This feature may be especially convenient when importing sources that do\n"
                                "# not use file naming conventions that can be conveniently described with\n"
                                "# the regexps below.\n"
                                "#\n"
                                "# Uncomment one of these lines as appropriate to override the default:\n"
                                "#\n"
                                "# untagged-source source\n"
                                "untagged-source precious\n"
                                "# untagged-source backup\n"
                                "# untagged-source junk\n"
                                "# untagged-source unrecognized\n"
                                "#\n"
                                "\n"),
                               ("# naming convention regexps\n"
                                "#\n"
                                "# For various commands, arch traverses your project trees, categorizing\n"
                                "# the files found there.  For example, when importing a project for\n"
                                "# the first time, this traversal determines which files are included\n"
                                "# in the import.\n"
                                "#\n"
                                "# The categories of greatest importance are defined in terms of three\n"
                                "# questions:\n"
                                "#\n"
                                "# 1) If arch makes a local copy of this tree, should this file be included\n"
                                "#    in the copy?\n"
                                "#\n"
                                "# 2) Is it generally safe to remove this file based only on how it is named?\n"
                                "#    For example, can it be safely clobbered by a new file of the same name?\n"
                                "#\n"
                                "# 3) Should this file be archived along with the project?  For example,\n"
                                "#    should it be included when importing the project for the first time?\n"
                                "#\n"
                                "# The primary categories are:\n"
                                "#\n"
                                "# category:      copy locally?       safe to clobber?      archive?\n"
                                "#\n"
                                "# junk           no                  yes                   no\n"
                                "# backup         no                  no                    no\n"
                                "# precious       yes                 no                    no\n"
                                "# source         yes                 no                    yes\n"
                                "#\n"
                                "# There are two additional categories, unrelated to those questions:\n"
                                "#\n"
                                "# excluded -- during a traversal by inventory, this file (and,\n"
                                "#             if a directory, its contents) are simply ignored unless the\n"
                                "#             --all flag is specified.   This category is usually used to\n"
                                "#             omit arch's own control files from a listing.\n"
                                "#\n"
                                "# unrecognized -- a category for files whose name fits no other pattern.\n"
                                "#             Usually, the presence of unrecognized files is treated as an\n"
                                "#             error.   You can use the naming conventions to define certain\n"
                                "#             names as \"deliberately unrecognized\" -- i.e., filenames whose\n"
                                "#             presence in a source tree you _want_ to be treated as an error\n"
                                "#\n"
                                "# The traveral algorithm is described here, along with lines you can edit to\n"
                                "# customize the naming conventions.\n"
                                "#\n"
                                "# Starting at \".\" within a project tree (usually at the root of the\n"
                                "# project tree) consider each filename in that directory.\n"
                                "#\n"
                                "# The files \".\" and \"..\" are simply ignored.\n"
                                "#\n"
                                "# Files containing \"illegal characters\" are characterized as unrecognized.\n"
                                "# If they are directories, traversal does _not_ descend into those directories.\n"
                                "# Currently, the illegal characters are *, ?, [, ], \\, space, and tab.\n"
                                "# (The set of illegal characters may shrink in future releases.)\n"
                                "#\n"
                                "# In an interactive call to inventory _without_ the --all flag,\n"
                                "# names are next compared to the exclude regexp defined here.  Those that\n"
                                "# are ignored and not descended below.  (Most arch operations performing\n"
                                "# traversals internally, e.g. import, do not use this pattern\n"
                                "# and skip this step of the algorithm.\n"
                                "#\n"),
                               "\n",
                               "exclude ", excludes_regexp, "\n",
                               "\n",
                               ("# If the file has a name that begins with \"++\", it is categorized as\n"
                                "# _precious_.  Names of this form are hard-wired and reserved for use by arch\n"
                                "# itself.  Traversal does not descend into precious directories, but when a\n"
                                "# precious directory is copied, its contents are recursively copied.\n"
                                "#\n"
                                "# Files and directories that reach this stage and which arch recognizes as its\n"
                                "# own control files are classified at this step as source.   Traversal _does_\n"
                                "# descend into source directories.\n"
                                "#\n"
                                "# If the file has a name that begins with \",,\", it is categorized as _junk_.\n"
                                "# Names of this form are hard-wired and reserved for use by arch and other tools,\n"
                                "# and arch may clobber such files without warning.  In a project tree, when no \n"
                                "# arch commands are running, it is safe for users to delete any \",,\" files. \n"
                                "# Although the general rule for junk files is that arch is free to clobber them,\n"
                                "# in fact, arch will only ever clobber files starting with \",,\".\n"
                                "#\n"
                                "# Traversal does not descend into junk directories.\n"
                                "#\n"
                                "# For your convenience, at this step of the traversal, you can classify\n"
                                "# additional files as junk or precious:\n"
                                "#\n"),
                               "\n",
                               "junk ", junk_regexp, "\n",
                               "\n",
                               "precious ", precious_regexp, "\n",
                               "\n",
                               ("# Files matching the following regexp are classified as backup files, and\n"
                                "# traversal does not descend into backup directories:\n"
                                "#\n"),
                               "\n",
                               "backup ", backup_regexp, "\n",
                               "\n",
                               ("# If you want to force certain filenames to be treated as errors when present,\n"
                                "# you can add them to the regexp for deliberately unrecognized files.  Traversal\n"
                                "# does not descend into unrecognized directories.\n"),
                               "\n",
                               "unrecognized ", unrecognized_regexp, "\n",
                               "\n",
                               ("# Files which match the following pattern are treated as source files.\n"
                                "# Traversal _does_ descend into source directories:\n"),
                               "\n",
                               "source ", source_regexp, "\n",
                               "\n",
                               ("# Any files not classified by the above rules are classified as unrecognized.\n"
                                "# Traversal does not descend into unrecognized directories.\n"
                                "\n"),
                               str_end);


  lim_free (0, method_name);
  lim_free (0, excludes_regexp);
  lim_free (0, junk_regexp);
  lim_free (0, backup_regexp);
  lim_free (0, precious_regexp);
  lim_free (0, unrecognized_regexp);
  lim_free (0, source_regexp);

  return answer;
}



t_uchar *
arch_tree_id_tagging_method_file (t_uchar * tree_root)
{
  t_uchar * ctl_dir;
  t_uchar * answer;

  ctl_dir = arch_tree_ctl_dir (tree_root);
  answer = file_name_in_vicinity (0, ctl_dir, "=tagging-method");
  lim_free (0, ctl_dir);
  return answer;
}


enum arch_id_tagging_method
arch_tree_id_tagging_method (enum arch_inventory_category * cat_var, t_uchar * tree_root, int strict)
{
  struct arch_inventory_options options;
  enum arch_id_tagging_method answer;

  mem_set0 ((t_uchar *)&options, sizeof (options));
  arch_get_inventory_naming_conventions (&options, tree_root);
  if (cat_var)
    {
      *cat_var = options.untagged_source_category;
    }
  answer = options.method;
  arch_free_inventory_naming_conventions (&options);
  return answer;
}


void
arch_set_tree_id_tagging_method (t_uchar * tree_root,
                              enum arch_id_tagging_method method)
{
  int errn;
  t_uchar * method_name;
  t_uchar * method_file;
  t_uchar * method_dir;
  t_uchar * method_tmp;
  int out_fd;

  method_name = arch_id_tagging_method_name (method);
  method_file = arch_tree_id_tagging_method_file (tree_root);
  method_dir = file_name_directory_file (0, method_file);
  method_tmp = file_name_in_vicinity (0, method_dir, ",,tagging-method");

  vu_unlink (&errn, method_tmp);
  out_fd = safe_open (method_tmp, O_WRONLY | O_CREAT | O_EXCL, 0666);

  if (safe_access (method_file, F_OK))
    {
      safe_printfmt (out_fd, "%s\n", method_name);
    }
  else
    {
      int in_fd;
      t_uchar * line;
      long len;
      int emitted_method;

      in_fd = safe_open (method_file, O_RDONLY, 0);

      emitted_method = 0;

      while (1)
        {
          t_uchar * pos;
          t_uchar * lim;
          int line_maybe_specifies_method;
          enum arch_id_tagging_method maybe_method;
          int replace_with_method_name;

          safe_next_line (&line, &len, in_fd);
          if (!line)
            break;

          lim = line + len;
          pos = line;

          line_maybe_specifies_method = 0;
          replace_with_method_name = 0;

          while ((pos < lim) && char_is_blank (*pos))
            ++pos;

          if (((lim - pos) >= (sizeof ("names") - 1)) && !str_casecmp_n ("names", sizeof ("names") - 1, pos, sizeof ("names") - 1))
            {
              line_maybe_specifies_method = 1;
              maybe_method = arch_names_id_tagging;
              pos += sizeof ("names") - 1;
            }
          else if (((lim - pos) >= (sizeof ("explicit") - 1)) && !str_casecmp_n ("explicit", sizeof ("explicit") - 1, pos, sizeof ("explicit") - 1))
            {
              line_maybe_specifies_method = 1;
              maybe_method = arch_explicit_id_tagging;
              pos += sizeof ("explicit") - 1;
            }
          else if (((lim - pos) >= (sizeof ("implicit") - 1)) && !str_casecmp_n ("implicit", sizeof ("implicit") - 1, pos, sizeof ("implicit") - 1))
            {
              line_maybe_specifies_method = 1;
              maybe_method = arch_implicit_id_tagging;
              pos += sizeof ("implicit") - 1;
            }
          else if (((lim - pos) >= (sizeof ("tagline") - 1)) && !str_casecmp_n ("tagline", sizeof ("tagline") - 1, pos, sizeof ("tagline") - 1))
            {
              line_maybe_specifies_method = 1;
              maybe_method = arch_tagline_id_tagging;
              pos += sizeof ("tagline") - 1;
            }

          if (line_maybe_specifies_method)
            {
              while ((pos < lim) && char_is_space (*pos))
                ++pos;
              if (pos == lim)
                replace_with_method_name = 1;
            }

          if (replace_with_method_name)
            {
              safe_printfmt (out_fd, "%s\n", method_name);
              emitted_method = 1;
            }
          else
            {
              safe_printfmt (out_fd, "%.*s", (int)len, line);
              if (len && (line[len - 1] != '\n'))
                safe_printfmt (out_fd, "\n");
            }
        }

      if (!emitted_method)
        safe_printfmt (out_fd, "%s\n", method_name);

      safe_close (in_fd);
    }

  safe_close (out_fd);
  safe_rename (method_tmp, method_file);

  lim_free (0, method_name);
  lim_free (0, method_file);
  lim_free (0, method_dir);
  lim_free (0, method_tmp);
}


t_uchar *
arch_explicit_id_file_for (t_uchar * path)
{
  int errn;
  struct stat stat_buf;
  int is_file;
  t_uchar * parent_dir;
  t_uchar * dot_arch_dir;
  t_uchar * id_file_basename;
  t_uchar * id_file_path;

  if (vu_lstat (&errn, path, &stat_buf))
    {
      if (errn != ENOENT)
        {
          safe_printfmt (2, "i/o error (%d: %s) for vu_lstat of  %s\n", errn, errno_to_string (errn), path);
          exit (2);
        }
      is_file = 1;
    }
  else
    {
      is_file = !S_ISDIR (stat_buf.st_mode);
    }

  if (is_file)
    parent_dir = file_name_directory_file (0, path);
  else
    parent_dir = str_save (0, path);

  dot_arch_dir = file_name_in_vicinity (0, parent_dir, ".arch-ids");

  if (is_file)
    {
      id_file_basename = file_name_tail (0, path);
      id_file_basename = str_realloc_cat (0, id_file_basename, ".id");
    }
  else
    id_file_basename = str_save (0, "=id");


  id_file_path = file_name_in_vicinity (0, dot_arch_dir, id_file_basename);

  lim_free (0, parent_dir);
  lim_free (0, dot_arch_dir);
  lim_free (0, id_file_basename);

  return id_file_path;
}


t_uchar *
arch_generate_id (void)
{
  static unsigned long seq = 0;

  time_t now;
  char * now_str;
  t_uchar * nl;
  t_uchar * my_id;
  pid_t my_pid;
  t_uchar my_pid_str[128];
  t_uchar seq_str[128];
  t_uchar * id;

  if (0 > time (&now))
    panic ("unable to get time of day in arch_generate_id");

  now_str = ctime (&now);
  nl = str_chr_index (now_str, '\n');
  if (nl)
    *nl = 0;
  my_id = arch_my_id ();
  my_pid = getpid ();
  cvt_ulong_to_decimal (my_pid_str, (unsigned long)my_pid);
  cvt_ulong_to_decimal (seq_str, (unsigned long)seq);
  ++seq;

  id = str_alloc_cat_many (0, my_id, " ", now_str, " ", my_pid_str, ".", seq_str, str_end);

  lim_free (0, my_id);
  return id;
}

/* precondition: path must be accessible */
t_uchar *
arch_choose_explicit_id (t_uchar * path, t_uchar * id)
{
  t_uchar * tagline_id;
  t_uchar * new_explicit_id;
  
  tagline_id = arch_inventory_id (arch_tagline_id_tagging, 0, path, 0, 0, 0);

  if (!tagline_id || !arch_id_indicates_changelog (tagline_id))
    if (!id)
      new_explicit_id = arch_generate_id ();
    else
      new_explicit_id = str_save (0, id);
  else
    new_explicit_id = str_save (0, 2 + tagline_id);
  
  lim_free (0, tagline_id);

  return new_explicit_id;
}

int
str_cmp_suffix_n(t_uchar * a_string, t_uchar * b_string, int length)
{
    int len_a = str_length(a_string);
    int len_b = str_length(b_string);
    int cmp_length = (len_a < len_b) ? len_a : len_b;
    cmp_length = (cmp_length < length) ? cmp_length : length;
    return str_cmp(a_string + len_a - cmp_length, b_string + len_b - cmp_length);
}

static int
path_exists (char const * path)
{
    struct stat stat_buf;
    int errn;
    int answer;
    
    answer = vu_lstat (&errn, (char  *)path, &stat_buf);
    return answer == 0;
}

t_uchar *
arch_add_id (t_uchar * path, t_uchar * id)
{
  t_uchar * new_explicit_id;
  t_uchar * tree_root;
  t_uchar * rel_path;
  t_uchar * full_path;
  
  /* don't add ids for non existing paths */
  if (!path_exists (path))
    {
      safe_printfmt (2, "attempt to add missing path: %s\n", path);
      return NULL;
    }

  /* don't add ids for control paths */
  tree_root = arch_tree_root(0, path, 0);
  full_path = arch_abs_path (path);
  rel_path = full_path + str_length (tree_root) + 1;
  lim_free (0, tree_root);
  if (arch_is_control_file (rel_path, NULL) && str_cmp_suffix_n(path, ".arch-inventory", 15))
    {
      lim_free (0, full_path);
      return NULL;
    }
  lim_free (0, full_path);

  new_explicit_id = arch_choose_explicit_id (path, id);
  arch_add_explicit_id (path, new_explicit_id);

  return new_explicit_id;
}

void
arch_add_explicit_id (t_uchar * path, t_uchar * id)
{
  t_uchar * id_file;
  t_uchar * id_dir;
  int out_fd;

  id_file = arch_explicit_id_file_for (path);

  if (!safe_access (id_file, F_OK))
    {
      safe_printfmt (2, "attempt to id already tagged file: %s\n", path);
      exit (2);
    }

  id_dir = file_name_directory_file (0, id_file);

  ensure_directory_exists (id_dir);
  out_fd = safe_open (id_file, O_WRONLY | O_CREAT | O_EXCL, 0666);
  safe_printfmt (out_fd, "%s\n", id);
  safe_close (out_fd);

  lim_free (0, id_file);
  lim_free (0, id_dir);
}


void
arch_delete_explicit_id (t_uchar * path)
{
  t_uchar * id_file;

  id_file = arch_explicit_id_file_for (path);

  if (!safe_access (id_file, F_OK))
    safe_unlink (id_file);
  else
    {
      safe_printfmt (2, "attempt to remove non-existent id for %s\n", path);
      exit (2);
    }

  lim_free (0, id_file);
}


void
arch_move_explicit_id (t_uchar * from, t_uchar * to)
{
  t_uchar * old_id_file;
  t_uchar * new_id_file;
  t_uchar * new_id_dir;

  old_id_file = arch_explicit_id_file_for (from);
  new_id_file = arch_explicit_id_file_for (to);
  new_id_dir = file_name_directory_file (0, new_id_file);

  ensure_directory_exists (new_id_dir);
  safe_rename (old_id_file, new_id_file);

  lim_free (0, old_id_file);
  lim_free (0, new_id_file);
  lim_free (0, new_id_dir);
}


t_uchar *
arch_strong_explicit_dflt_file (t_uchar * dir)
{
  t_uchar * dot_arch_dir;
  t_uchar * answer;

  dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids");
  answer = file_name_in_vicinity (0, dot_arch_dir, "=all");

  lim_free (0, dot_arch_dir);
  return answer;
}

t_uchar *
arch_weak_explicit_dflt_file (t_uchar * dir)
{
  t_uchar * dot_arch_dir;
  t_uchar * answer;

  dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids");
  answer = file_name_in_vicinity (0, dot_arch_dir, "=default");

  lim_free (0, dot_arch_dir);
  return answer;
}

t_uchar *
arch_dont_care_explicit_dflt_file (t_uchar * dir)
{
  t_uchar * dot_arch_dir;
  t_uchar * answer;

  dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids");
  answer = file_name_in_vicinity (0, dot_arch_dir, "=dont-care");

  lim_free (0, dot_arch_dir);
  return answer;
}

int
arch_is_dont_care_explicit_dflt_dir (t_uchar * dir)
{
  t_uchar * file = 0;
  int answer;

  file = arch_dont_care_explicit_dflt_file (dir);
  answer = !safe_access (file, F_OK);

  lim_free (0, file);
  return answer;
}

void
arch_delete_strong_explicit_default (t_uchar * dir)
{
  t_uchar * file;

  file = arch_strong_explicit_dflt_file (dir);

  if (safe_access (file, F_OK))
    {
      safe_printfmt (2, "attempt to delete non-existing strong explicit default in %s\n", dir);
      exit (2);
    }

  safe_unlink (dir);
  lim_free (0, file);
}


void
arch_delete_weak_explicit_default (t_uchar * dir)
{
  t_uchar * file;

  file = arch_weak_explicit_dflt_file (dir);

  if (safe_access (file, F_OK))
    {
      safe_printfmt (2, "attempt to delete non-existing weak explicit default in %s\n", dir);
      exit (2);
    }

  safe_unlink (dir);
  lim_free (0, file);
}

void
arch_delete_dont_care_explicit_default (t_uchar * dir)
{
  t_uchar * file;

  file = arch_dont_care_explicit_dflt_file (dir);

  if (safe_access (file, F_OK))
    {
      safe_printfmt (2, "attempt to delete non-existing dont-care explicit default in %s\n", dir);
      exit (2);
    }

  safe_unlink (dir);
  lim_free (0, file);
}


void
arch_set_strong_explicit_default (t_uchar * dir, t_uchar * id)
{
  t_uchar * file;
  t_uchar * file_dir;
  int out_fd;

  file = arch_strong_explicit_dflt_file (dir);
  file_dir = file_name_directory_file (0, file);

  if (!safe_access (file, F_OK))
    {
      safe_printfmt (2, "attempt to overwrite strong explicit default in %s\n", dir);
      exit (2);
    }

  ensure_directory_exists (file_dir);
  out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666);
  safe_printfmt (out_fd, "%s\n", id);
  safe_close (out_fd);

  lim_free (0, file);
  lim_free (0, file_dir);
}

void
arch_set_weak_explicit_default (t_uchar * dir, t_uchar * id)
{
  t_uchar * file;
  t_uchar * file_dir;
  int out_fd;

  file = arch_weak_explicit_dflt_file (dir);
  file_dir = file_name_directory_file (0, file);

  if (!safe_access (file, F_OK))
    {
      safe_printfmt (2, "attempt to overwrite weak explicit default in %s\n", dir);
      exit (2);
    }

  ensure_directory_exists (file_dir);
  out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666);
  safe_printfmt (out_fd, "%s\n", id);
  safe_close (out_fd);

  lim_free (0, file);
  lim_free (0, file_dir);
}

void
arch_set_dont_care_explicit_default (t_uchar * dir)
{
  t_uchar * file;
  t_uchar * file_dir;
  int out_fd;

  file = arch_dont_care_explicit_dflt_file (dir);
  file_dir = file_name_directory_file (0, file);

  if (safe_access (file, F_OK))
    {
      ensure_directory_exists (file_dir);
      out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666);
      safe_close (out_fd);
    }

  lim_free (0, file);
  lim_free (0, file_dir);
}




/*(c file_id)
 * static t_uchar * file_id (int * errn,
 *                           struct alloc_limits * limits,
 *                           enum ftag_method method,
 *                           t_uchar * path);
 *
 * Return a newly allocated string containing the inventory id
 * of the file `path' using inventory method `method'.
 *
 * Return 0 and set `*errn' if the id can not be computed.
 * If no I/O error occurs, the file does not have an explicit
 * id, but `method' is `ftag_explicit', `*errn' is set to 0.
 */
static t_uchar *
file_id (int * errn,
         struct alloc_limits * limits,
         enum ftag_method method,
         int untagged_is_source,
         t_uchar * path,
         assoc_table id_tagging_shortcut,
         struct stat * known_lstat,
         assoc_table * explicit_skips)
{
  t_uchar * answer = 0;
  t_uchar * as_file = 0;
  t_uchar * basename = 0;
  t_uchar * dir = 0;
  t_uchar * dir_as_file = 0;
  t_uchar * dir_basename = 0;
  t_uchar * id_file = 0;
  struct stat stat_buf;
  int is_dir;
  int is_symlink;

  if (!path)
    {
      *errn = EINVAL;

    return_answer:

      lim_free (limits, as_file);
      lim_free (limits, basename);
      lim_free (limits, dir);
      lim_free (limits, dir_as_file);
      lim_free (limits, dir_basename);
      lim_free (limits, id_file);

      return answer;
    }

  as_file = file_name_from_directory (limits, path);
  if (!as_file)
    {
    enomem_error:
      *errn = ENOMEM;
      goto return_answer;
    }

  if (method == ftag_names)
    {
      answer = str_alloc_cat (limits, "?", as_file);
      if (!answer)
        goto enomem_error;
      else
        goto return_answer;
    }


  basename = file_name_tail (limits, as_file);
  dir = file_name_directory (limits, as_file);
  if (!dir)
    dir = str_save (limits, ".");
  if (!(basename && dir))
    goto enomem_error;

  dir_as_file = file_name_from_directory (limits, dir);
  if (!dir_as_file)
    goto enomem_error;

  dir_basename = file_name_tail (limits, dir_as_file);
  if (!dir_basename)
    goto enomem_error;

  /* Explicit id files use their contents as id, with the
   * prefix 'E'.
   */
  if (!str_cmp (dir_basename, ".arch-ids"))
    {
      answer = explicit_id (errn, 0, limits, path, as_file, "E_", 0);
      goto return_answer;
    }

  /* Explicit id file directories:
   */
  if (!str_cmp (basename, ".arch-ids"))
    {
      long amt;
      answer = str_alloc_cat (limits, "D_", as_file);
      if (!answer)
        goto enomem_error;
      amt = smash_non_graphical (answer, str_length (answer));
      answer[amt] = 0;
      goto return_answer;
    }

  /* Paths beginning with "./{arch}" are tagged with their own
   * path name, with the prefix "A_".  The presumptions are that these
   * files never move, and that if a file is present, its contents are
   * invariant.
   */
  if (is_at_or_underneath_archdir (as_file))
    {
      long amt;
      answer = str_alloc_cat (limits, "A_", as_file);
      if (!answer)
        goto enomem_error;
      amt = smash_non_graphical (answer, str_length (answer));
      answer[amt] = 0;
      goto return_answer;
    }


  /* Try for an explicit id:
   */
  if (known_lstat)
    stat_buf = *known_lstat;
  else if (0 > vu_lstat (errn, as_file, &stat_buf))
    goto return_answer;

  if (S_ISDIR (stat_buf.st_mode))
    {
      is_dir = 1;
      is_symlink = 0;
      id_file = file_name_in_vicinity (limits, as_file, ".arch-ids/=id");
      if (!id_file)
        goto enomem_error;
    }
  else
    {
      is_dir = 0;
      is_symlink = S_ISLNK (stat_buf.st_mode);

      id_file = file_name_in_vicinity (limits, dir, ".arch-ids/");
      if (!id_file)
        goto enomem_error;
      id_file = str_realloc_cat (limits, id_file, basename);
      if (!id_file)
        goto enomem_error;
      id_file = str_realloc_cat (limits, id_file, ".id");
      if (!id_file)
        goto enomem_error;
    }

  *errn = 0;
  answer = explicit_id (errn, 0, limits, path, id_file, "x_", 0);
  if (answer || (*errn != ENOENT))
    goto return_answer;
  else
    {
      /* Is there a .arch-ids/=all file here?
       */
      lim_free (limits, id_file);
      if (is_dir)
        id_file = file_name_in_vicinity (limits, as_file, ".arch-ids/=all");
      else
        id_file = file_name_in_vicinity (limits, dir, ".arch-ids/=all");

      if (!id_file)
        goto enomem_error;

      *errn = 0;
      answer = explicit_id (errn, explicit_skips, limits, path, id_file, "a_", (is_dir ? (t_uchar *)"./." : basename));
      if (answer || (*errn != ENOENT))
        goto return_answer;

      if ((method == ftag_implicit) && !is_dir && !is_symlink)
        {
          *errn = 0;
          answer = implicit_id (errn, limits, path, basename, "i_", &stat_buf, id_tagging_shortcut);
          if (answer || *errn)
            goto return_answer;
        }

      if ((method == ftag_tagline) && !is_dir && !is_symlink)
        {
          *errn = 0;
          answer = implicit_id (errn, limits, path, 0, "i_", &stat_buf, id_tagging_shortcut);
          if (answer || *errn)
            goto return_answer;
        }


      /* is there an "=default" id?
       */
      lim_free (limits, id_file);
      if (is_dir)
        id_file = file_name_in_vicinity (limits, as_file, ".arch-ids/=default");
      else
        id_file = file_name_in_vicinity (limits, dir, ".arch-ids/=default");

      if (!id_file)
        goto enomem_error;

      *errn = 0;
      answer = explicit_id (errn, explicit_skips, limits, path, id_file, "w_", (is_dir ?  (t_uchar *)"./." : basename));
      if (answer || (*errn != ENOENT))
        goto return_answer;

      /* no explicit, =all, implicit, tagline, or =default id.
       */
      if (untagged_is_source)
        {
          int skipped;

          lim_free (limits, id_file);
          id_file = file_name_in_vicinity (limits, dir, ".arch-ids/=dont-care");
          if (!id_file)
            goto enomem_error;

          skipped = (explicit_skips && !!assoc_ref (*explicit_skips, id_file));

          if (!skipped && (0 <= vu_lstat (errn, id_file, &stat_buf)))
            {
              long amt;
              answer = str_alloc_cat (limits, "k_", as_file);
              if (!answer)
                goto enomem_error;
              amt = smash_non_graphical (answer, str_length (answer));
              answer[amt] = 0;
              goto return_answer;
            }

          if (!skipped && explicit_skips)
            assoc_set (explicit_skips, id_file, "yes");

          if (*errn == ENOENT)
            {
              long amt;
              answer = str_alloc_cat (limits, "?_", as_file);
              if (!answer)
                goto enomem_error;
              amt = smash_non_graphical (answer, str_length (answer));
              answer[amt] = 0;
              goto return_answer;
            }
          else
            goto return_answer;
        }
      else
        {
          *errn = 0;
          goto return_answer;
        }
    }

}



t_uchar *
arch_id_from_explicit_file (int *errn, t_uchar * path)
{
  return explicit_id (errn, 0, 0, 0, path, "x_", 0);
}

static int
is_at_or_underneath_archdir (char * rel_file)
{
  static int compiled = 0;
  static regex_t pattern = {0,};

  if (!compiled)
    {
      int re_error;

      re_error = regcomp (&pattern, "^(.*/)?(\\{arch\\}(/[a-zA-Z=][^/~]*)*|\\{arch\\}/\\.arch-project-tree)$", REG_EXTENDED);
      invariant (!re_error);
      compiled = 1;
    }

  return filename_matches (&pattern, rel_file);
}

static int
filename_matches (regex_t * pattern, char * filename)
{
  int answer;

  answer = regexec (pattern, filename, 0, 0, 0);

  if (answer == REG_NOMATCH)
    return 0;

  if (answer == REG_NOERROR)
    return 1;

  panic ("unexpected regexec error in arch_inventory_traversal");
  return -1;
}


static t_uchar *
explicit_id (int * errn,
              assoc_table * skips,
              struct alloc_limits * limits,
              t_uchar * arg_file,
              t_uchar * id_file,
              t_uchar * prefix,
              t_uchar * postfix)
{
  int id_fd;
  t_uchar * answer;
  char buf[1024];
  long amt;
  int ign;


  if (skips && assoc_ref (*skips, id_file))
    {
      *errn = ENOENT;
      return 0;
    }

#if DISABLED_NEW_CODE
  if (id_tagging_shortcut)
    {
      struct stat stat_buf;
      /* prefix determines action..
       * E_ - it's a .arch-ids file we are examinig
       * x_ - it's a normal file that /may/ have an id file
       */
      /* id_file is always the (prospective) id file */
      /* TODO?: Cache the inode sig of the id file */
      t_uchar * signature;
      t_uchar * cached_id;
      
      if (statb)
      stat_buf = *statb;
      else if (0 > vu_lstat (errn, arg_file, &stat_buf))
      return 0;
      
      signature = arch_statb_inode_sig (&stat_buf);
      cached_id = assoc_ref (id_tagging_shortcut, signature);
      lim_free (0, signature);

      if (cached_id) 
      {
        if (!strcmp (arg_file, id_file))
          {
            if ((cached_id[0] == 'E') && (cached_id[1] == '_'))
            return str_save (0, cached_id);
          }
        else if ((cached_id[0] == 'x') && (cached_id[1] == '_'))
          {
            /* TODO: we could save 1 stat per .id file if we cached the results of the lookups somewhere - 
             * .arch-ids is read before the files in the dir.
             */
            t_uchar *id_tag = explicit_id (errn, skips, limits, id_file, id_file, "E_", 0, 0, id_tagging_shortcut);
            if (id_tag && (!strcmp (id_tag + 1, cached_id + 1)))
            {
              lim_free (0, id_tag);
              return str_save (0, cached_id);
            }
            else
            lim_free (0, id_tag);
          }
      }
    }
#endif

  id_fd = vu_open (errn, id_file, O_RDONLY, 0);

  if (id_fd < 0)
    {
      if (skips)
        assoc_set (skips, id_file, "yes");
      return 0;
    }

  answer = str_save (limits, prefix);
  if (!answer)
    {
    enomem_error:
      *errn = ENOMEM;
      if (answer)
        lim_free (limits, answer);
      return 0;
    }

  while (1)
    {
      t_uchar * eol;

      amt = vu_read_retry (errn, id_fd, buf, sizeof (buf));

      if (amt < 0)
        {
          lim_free (limits, answer);
          vu_close (&ign, id_fd);
          return 0;
        }

      if (!amt)
        break;

      eol = str_chr_index_n (buf, amt, '\n');
      if (!eol)
        {
          t_uchar * old_answer;
          amt = smash_non_graphical (buf, amt);
          old_answer = answer;
          answer = str_realloc_cat_n (limits, answer, buf, amt);
          if (!answer)
            goto enomem_error;
        }
      else
        {
          t_uchar * old_answer;
          amt = eol - (t_uchar *)buf;
          amt = smash_non_graphical (buf, amt);
          old_answer = answer;
          answer = str_realloc_cat_n (limits, answer, buf, amt);
          if (!answer)
            goto enomem_error;
          break;
        }
    }

  answer = str_realloc_cat (limits, answer, (postfix ? postfix : (t_uchar *)""));
  if (!answer)
    goto enomem_error;
  vu_close (&ign, id_fd);
  return answer;
}


static t_uchar *
implicit_id (int * errn,
              struct alloc_limits * limits,
              t_uchar * file,
              t_uchar * basename,
              t_uchar * prefix,
              struct stat * statb,
              assoc_table id_tagging_shortcut)
{
  int file_fd;
  struct stat file_stat_buf;
  char buf[1025];
  int amt;
  int line;
  int bottom;

  if (id_tagging_shortcut)
    {
      t_uchar * signature = arch_statb_inode_sig (statb);
      t_uchar * cached_id = assoc_ref (id_tagging_shortcut, signature);

      lim_free (0, signature);
      if (cached_id && (cached_id[0] == 'i') && (cached_id[1] == '_'))
        return str_save (0, cached_id);
      else if (cached_id)
        return 0;
    }

  /* This is a slightly screwy, historic interface.
   *
   * Passing `base != 0' means the old, larch-style tag syntax.
   *
   * Passing `base == 0' means tagline syntax.
   */

  /* Search the file itself (last, then first 1K) for a line beginning:
   *
   * tla-style tagline id tagging (basename == 0)
   * -----------------------------------------
   *
   * <punct>arch-tag:<blanks>
   *
   *
   * larch-style implicit id tagging (basename != 0)
   * --------------------------------------------
   *
   * <punct>basename<blanks>-
   *
   * or
   *
   * <punct>tag:<blanks>
   *
   * after the dash, skip any blanks -- the rest is the id.
   */

  file_fd = vu_open (errn, file, O_RDONLY, 0);
  if (file_fd < 0)
    return 0;
  
  if (0 > vu_fstat (errn, file_fd, &file_stat_buf))
      goto error_return;

  for (bottom = 1; bottom >= 0; --bottom)
    {
      if (!bottom)
        {
          if (0 > vu_lseek (errn, file_fd, 0, SEEK_SET))
            {
              int ign;
            error_return:
              vu_close (&ign, file_fd);
              return 0;
            }
          amt = vu_read_retry (errn, file_fd, buf, sizeof (buf) - 1);
          if (amt < 0)
            goto error_return;
        }
      else
        {
          char * x;

          if (file_stat_buf.st_size > sizeof (buf))
            amt = sizeof (buf);
          else
            continue;
          /* Yes, this is a off by one error. However changing it
           * breaks existing file-ids
           */
          if (0 > vu_lseek (errn, file_fd, -1026, SEEK_END))
            goto error_return;
          amt = vu_read_retry (errn, file_fd, buf, sizeof (buf));
          if (amt < 0)
            goto error_return;
          x = str_chr_index_n (buf, amt, '\n');
          if (!x)
            continue;
          amt = amt - (1 + x - buf);
          mem_move (buf, x + 1, amt);
        }

      buf[amt] = 0;
      line = 0;

      while (1)
        {
          int is_inventory_id;

          /* skip punctuation and blanks at the start of the line
           */
          while ((line < amt) && (char_is_punct (buf[line]) || char_is_blank (buf[line])))
            ++line;

          if (line == amt)
            break;

          if (buf[line] == '\n')
            {
              ++line;
              continue;
            }

          is_inventory_id = (basename ? !str_cmp_prefix ("tag:", buf + line) : !str_cmp_prefix ("arch-tag:", buf + line));

          if (   !is_inventory_id
              && (!basename || str_cmp_prefix (basename, buf + line)))
            {
              t_uchar * eol;

            not_this_line:
              eol = str_chr_index_n (buf + line, amt - line, '\n');
              if (!eol)
                break;
              line = eol - (t_uchar *)buf;
            }
          else
            {
              t_uchar * eol;

              if (is_inventory_id)
                line += (basename ? str_length ("tag:") : str_length ("arch-tag:"));
              else
                line += str_length (basename);

              if (!is_inventory_id)
                {
                  while ((line < amt) && char_is_blank (buf[line]))
                    ++line;

                  if (line == amt)
                    break;

                  if (buf[line] != '-')
                    goto not_this_line;

                  ++line;
                }

              if (line == amt)
                break;

              /* This is the tag line.
               */
              while ((line < amt) && char_is_blank (buf[line]))
                ++line;

              eol = str_chr_index_n (buf + line, amt - line, '\n');
              if (!eol)
                /* end of buffer no eol */
                {
                  eol = buf + amt;
                  if (!bottom)
                    if (file_stat_buf.st_size > sizeof (buf))
                      safe_printfmt (2, "Warning: top-of-file truncated tag in: %s\n", file);
                }

              if (0 == (eol - (t_uchar *)(buf + line)))
                {
                  /* an empty id
                   */
                  break;
                }

              {
                long size;
                t_uchar * answer;

                size = smash_non_graphical (buf + line, eol - (t_uchar *)(buf + line));
                answer = str_alloc_cat_n (limits, prefix, buf + line, size);
                if (0 > vu_close (errn, file_fd))
                  goto error_return;
                if (!answer)
                  *errn = ENOMEM;
                return answer;
              }
            }
        }
    }

  if (0 > vu_close (errn, file_fd))
    goto error_return;
  *errn = 0;
  return 0;
}



static long
smash_non_graphical (t_uchar * buf, long amt)
{
  long x;

  while (amt > 0)
    {
      if (!char_is_graph (buf[amt - 1]))
        --amt;
      else
        break;
    }

  for (x = 0; x < amt; ++x)
    {
      if (!char_is_graph (buf[x]))
        buf[x] = '_';
    }

  return amt;
}


/* tag: Tom Lord Wed May 14 07:20:26 2003 (inv-tags.c)
 */

Generated by  Doxygen 1.6.0   Back to index