Logo Search packages:      
Sourcecode: bazaar version File versions

invent.c

/* invent.c: project tree inventory library routines
 *
 ****************************************************************
 * Copyright (C) 2002, 2003 Tom Lord
 *
 * See the file "COPYING" for further information about
 * the copyright and warranty status of this work.
 */


#include "hackerlab/bugs/panic.h"
#include "hackerlab/os/errno.h"
#include "hackerlab/os/errno-to-string.h"
#include "hackerlab/mem/mem.h"
#include "hackerlab/char/char-class.h"
#include "hackerlab/char/str.h"
#include "hackerlab/arrays/ar.h"
#include "hackerlab/fs/file-names.h"
#include "hackerlab/vu/safe.h"
#include "libarch/inode-sig.h"
#include "libarch/inv-ids.h"
#include "libarch/invent.h"



struct directory_regexps
{
  regex_t * regexps[sizeof (struct arch_inventory_regexps) / sizeof (regex_t)];
  struct arch_inventory_regexps storage;
#define DIR_REGEXP(dir_re, name) \
  (dir_re)->regexps[offsetof (struct arch_inventory_regexps, name##_pattern) / sizeof (regex_t)]
};


/* __STDC__ prototypes for static functions */
static void copy_options_but_regexps (struct arch_inventory_options * dest,
                                      const struct arch_inventory_options * src);
static void source_inventory_callback (t_uchar * path,
                                       struct stat * statb,
                                       enum arch_inventory_category category,
                                       t_uchar * id,
                                       int has_source_name,
                                       void * closure,
                                       int escape_classes);
static void source_inventory_files_callback (t_uchar * path,
                                             struct stat * statb,
                                             enum arch_inventory_category category,
                                             t_uchar * id,
                                             int has_source_name,
                                             void * closure,
                                             int escape_classes);
static void arch_inventory_traversal_internal (struct arch_inventory_options * options,
                                               t_uchar * root,
                                               inv_callback callback,
                                               void * closure,
                                               assoc_table id_tagging_shortcut,
                                               assoc_table * explicit_skips,
                                               int escape_classes);
static int cmp_files (const void * va, const void * vb);
static int right_order_for_recursion (char * a, char * b);
static int contains_illegal_character (char * filename);
static int filename_matches (regex_t * pattern, char * filename);
static int is_nested_tree (char * path);
static int is_comment_line (t_uchar * line, long len);
static int sets_re (char * kw, char ** re, t_uchar * line, long len);
static int sets_id_tagging_method (char * kw,
                                   enum arch_id_tagging_method * method_var,
                                   enum arch_inventory_category * untagged_category_var,
                                   enum arch_id_tagging_method method,
                                   enum arch_inventory_category untagged_category,
                                   t_uchar * line, long len);
static int sets_untagged_source_disposition (enum arch_inventory_category * untagged_category_var,
                                             t_uchar * saved_line, long saved_len);
static void read_directory_regexps (struct directory_regexps * regexps, char * dir_name);
static void free_directory_regexps (struct directory_regexps * regexps);



rel_table
arch_source_inventory (t_uchar * tree_root, int include_ctl, int include_precious, int include_nested)
{
  int here_fd;
  rel_table answer = 0;
  struct arch_inventory_options options;

  here_fd = safe_open (".", O_RDONLY, 0);
  safe_chdir (tree_root);

  mem_set0 ((t_uchar *)&options, sizeof (options));
  options.categories = arch_inventory_source | (include_precious ? arch_inventory_precious : 0);
  options.want_ids = 1;
  options.method = arch_names_id_tagging; /* default only */
  options.nested = include_nested;
  options.include_excluded = !!include_ctl;
  arch_get_inventory_naming_conventions (&options, ".");

  arch_inventory_traversal (&options, ".", source_inventory_callback, (void *)&answer, 0);

  arch_free_inventory_naming_conventions (&options);

  safe_fchdir (here_fd);
  safe_close (here_fd);

  return answer;
}

rel_table
arch_source_files_inventory (t_uchar * tree_root, int include_ctl, int include_precious)
{
  int here_fd;
  rel_table answer = 0;
  struct arch_inventory_options options;

  here_fd = safe_open (".", O_RDONLY, 0);
  safe_chdir (tree_root);

  mem_set0 ((t_uchar *)&options, sizeof (options));
  options.categories = arch_inventory_source | (include_precious ? arch_inventory_precious : 0);
  options.want_ids = 1;
  options.method = arch_names_id_tagging; /* default only */
  options.nested = 0;
  options.include_excluded = !!include_ctl;
  arch_get_inventory_naming_conventions (&options, ".");

  arch_inventory_traversal (&options, ".", source_inventory_files_callback, (void *)&answer, 0);

  arch_free_inventory_naming_conventions (&options);

  safe_fchdir (here_fd);
  safe_close (here_fd);

  return answer;
}


static void
copy_options_but_regexps (struct arch_inventory_options * dest,
                          const struct arch_inventory_options * src)
{
  dest->categories = src->categories;
  dest->method = src->method;
  dest->want_ids = src->want_ids;
  dest->treat_unrecognized_source_as_source = src->treat_unrecognized_source_as_source;
  dest->nested = src->nested;
  dest->include_excluded = src->include_excluded;
  dest->override_method = src->override_method;
}


static void
source_inventory_callback (t_uchar * path,
                           struct stat * statb,
                           enum arch_inventory_category category,
                           t_uchar * id,
                           int has_source_name,
                           void * closure,
                           int escape_classes)
{
  rel_table * answer = (rel_table *)closure;

  rel_add_records (answer, rel_make_record (path, id, 0), 0);
}



static void
source_inventory_files_callback (t_uchar * path,
                                 struct stat * statb,
                                 enum arch_inventory_category category,
                                 t_uchar * id,
                                 int has_source_name,
                                 void * closure,
                                 int escape_classes)
{
  rel_table * answer = (rel_table *)closure;

  if (!S_ISDIR (statb->st_mode))
    rel_add_records (answer, rel_make_record (path, id, 0), 0);
}




t_uchar *
arch_default_naming_conventions_regexp (enum arch_inventory_category cat)
{
  switch (cat)
    {
    default:
      {
        panic ("unrecognized inventory category (arch_default_naming_conventions_regexp)");
        return 0;                 /* not reached */
      }

    case arch_inventory_source:
      {
        return str_save (0, ".");
      }

    case arch_inventory_precious:
      {
        return str_save (0, "^(\\+.*|\\.gdbinit|\\.#ckpts-lock|=build\\.*|=install\\.*|CVS|CVS\\.adm|RCS|RCSLOG|SCCS|TAGS|tags|cscope.*\\.out|\\.svn)$");
      }

    case arch_inventory_backup:
      {
        return str_save (0, "^.*(~|\\.~[0-9]+~|\\.bak|\\.swp|\\.orig|\\.rej|\\.original|\\.modified|\\.reject|\\.(o|a|so|core|so(\\.[[:digit:]]+)*))$|^core$");
      }

    case arch_inventory_junk:
      {
        return str_save (0, "^(,.*)$");
      }

    case arch_inventory_unrecognized:
      {
        return str_save (0, "^$");
      }

    case arch_inventory_excludes:
      {
        return str_save (0, "^(.arch-ids|\\{arch\\}|\\.arch-inventory)$");
      }
    }
}


/* DO NOT under ANY circumastances change these. They predate pre-populating 
 * tagging-method and will break old archives if altered.
 */
t_uchar *
arch_ancient_default_naming_conventions_regexp (enum arch_inventory_category cat)
{
  switch (cat)
    {
    default:
      {
        panic ("unrecognized inventory category (arch_ancient_default_naming_conventions_regexp)");
        return 0;                 /* not reached */
      }

    case arch_inventory_source:
      {
        return str_save (0, "^([_=a-zA-Z0-9].*|\\.arch-ids|\\{arch\\}|\\.arch-project-tree)$");
      }

    case arch_inventory_precious:
      {
        return str_save (0, "^(\\+.*|\\.gdbinit|\\.#ckpts-lock|=build\\.*|=install\\.*|CVS|CVS\\.adm|RCS|RCSLOG|SCCS|TAGS)$");
      }

    case arch_inventory_backup:
      {
        return str_save (0, "^.*(~|\\.~[0-9]+~|\\.bak|\\.orig|\\.rej|\\.original|\\.modified|\\.reject)$");
      }

    case arch_inventory_junk:
      {
        return str_save (0, "^(,.*)$");
      }

    case arch_inventory_unrecognized:
      {
        return str_save (0, "^(.*\\.(o|a|so|core)|core)$");
      }

    case arch_inventory_excludes:
      {
        return str_save (0, "^(.arch-ids|\\{arch\\}|\\.arch-inventory)$");
      }
    }
}


t_uchar *
arch_null_default_naming_conventions_regexp (enum arch_inventory_category cat)
{
  switch (cat)
    {
    default:
      {
        panic ("unrecognized inventory category (arch_null_default_naming_conventions_regexp)");
        return 0;                 /* not reached */
      }
    case arch_inventory_source:
      {
        return str_save (0, ".*");
      }

    case arch_inventory_precious:
      {
        return str_save (0, "^$");
      }

    case arch_inventory_backup:
      {
        return str_save (0, "^$");
      }

    case arch_inventory_junk:
      {
        return str_save (0, "^$");
      }

    case arch_inventory_unrecognized:
      {
        return str_save (0, "^$");
      }

    case arch_inventory_excludes:
      {
        return str_save (0, "^(.arch-ids|\\{arch\\})$");
      }
    }
}


void
arch_get_inventory_naming_conventions (struct arch_inventory_options * options,
                                       char * tree_root)
{
  char * excludes = 0;
  char * junk = 0;
  char * backup = 0;
  char * precious = 0;
  char * unrecognized = 0;
  char * source = 0;
  int re_error;

  if (tree_root)
    {
      t_uchar * id_tagging_method_file;

      id_tagging_method_file = arch_tree_id_tagging_method_file (tree_root);

      if (!safe_access (id_tagging_method_file, F_OK))
        {
          int in_fd;
          t_uchar * line;
          long len;
          enum arch_id_tagging_method tree_method = arch_names_id_tagging;
          enum arch_inventory_category untagged_source_category = arch_inventory_precious;

          in_fd = safe_open (id_tagging_method_file, O_RDONLY, 0);

          while (1)
            {
              safe_next_line (&line, &len, in_fd);
              if (!len)
                break;

              (void)(!is_comment_line (line, len)
                     && !sets_id_tagging_method ("implicit", &tree_method, &untagged_source_category, arch_implicit_id_tagging, arch_inventory_source, line, len)
                     && !sets_id_tagging_method ("tagline", &tree_method, &untagged_source_category, arch_tagline_id_tagging, arch_inventory_source, line, len)
                     && !sets_id_tagging_method ("explicit", &tree_method, &untagged_source_category, arch_explicit_id_tagging, arch_inventory_precious, line, len)
                     && !sets_id_tagging_method ("names", &tree_method, &untagged_source_category, arch_names_id_tagging, arch_inventory_source, line, len)

                     && !sets_untagged_source_disposition (&untagged_source_category, line, len)

                     && !sets_re ("exclude", &excludes, line, len)
                     && !sets_re ("junk", &junk, line, len)
                     && !sets_re ("backup", &backup, line, len)
                     && !sets_re ("precious", &precious, line, len)
                     && !sets_re ("unrecognized", &unrecognized, line, len)
                     && !sets_re ("source", &source, line, len));
            }

          safe_close (in_fd);
          if (!options->override_method)
            {
              options->method = tree_method;
              options->untagged_source_category = untagged_source_category;
            }
        }

      /* default naming conventions.
       */
      if (excludes == NULL)
      excludes = arch_ancient_default_naming_conventions_regexp (arch_inventory_excludes);
      if (junk == NULL)
      junk = arch_ancient_default_naming_conventions_regexp (arch_inventory_junk);
      if (backup == NULL)
      backup = arch_ancient_default_naming_conventions_regexp (arch_inventory_backup);
      if (precious == NULL)
      precious = arch_ancient_default_naming_conventions_regexp (arch_inventory_precious);
      if (unrecognized == NULL)
      unrecognized = arch_ancient_default_naming_conventions_regexp (arch_inventory_unrecognized);
      if (source == NULL)
      source = arch_ancient_default_naming_conventions_regexp (arch_inventory_source);

      lim_free (0, id_tagging_method_file);
    }
  else
    {
      excludes = arch_null_default_naming_conventions_regexp (arch_inventory_excludes);
      junk = arch_null_default_naming_conventions_regexp (arch_inventory_junk);
      backup = arch_null_default_naming_conventions_regexp (arch_inventory_backup);
      precious = arch_null_default_naming_conventions_regexp (arch_inventory_precious);
      unrecognized = arch_null_default_naming_conventions_regexp (arch_inventory_unrecognized);
      source = arch_null_default_naming_conventions_regexp (arch_inventory_source);
    }

  /* compile the conventions.
   */

  re_error = regcomp (&options->regexps.excludes_pattern, excludes, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `excludes'");

  re_error = regcomp (&options->regexps.junk_pattern, junk, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `junk'");

  re_error = regcomp (&options->regexps.backup_pattern, backup, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `backup'");

  re_error = regcomp (&options->regexps.precious_pattern, precious, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `precious'");

  re_error = regcomp (&options->regexps.unrecognized_pattern, unrecognized, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `unrecognized'");

  re_error = regcomp (&options->regexps.source_pattern, source, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `source'");


  lim_free (0, excludes);
  lim_free (0, junk);
  lim_free (0, backup);
  lim_free (0, precious);
  lim_free (0, unrecognized);
  lim_free (0, source);
}


void
arch_free_inventory_naming_conventions (struct arch_inventory_options * options)
{
  regfree (&options->regexps.excludes_pattern);
  regfree (&options->regexps.junk_pattern);
  regfree (&options->regexps.backup_pattern);
  regfree (&options->regexps.precious_pattern);
  regfree (&options->regexps.unrecognized_pattern);
  regfree (&options->regexps.source_pattern);
}


void
arch_inventory_traversal (struct arch_inventory_options * options,
                          t_uchar * root,
                          inv_callback callback,
                          void * closure,
                          int escape_classes)
{
  assoc_table id_tagging_shortcut = 0;
  assoc_table explicit_skips = 0;

  if ((options->method == arch_implicit_id_tagging) || (options->method == arch_tagline_id_tagging))
    arch_read_id_shortcut (&id_tagging_shortcut, root);

  arch_inventory_traversal_internal (options, root, callback, closure, id_tagging_shortcut, &explicit_skips, escape_classes);

  free_assoc_table (id_tagging_shortcut);
  free_assoc_table (explicit_skips);
}


static void
arch_inventory_traversal_internal (struct arch_inventory_options * options,
                                   t_uchar * root,
                                   inv_callback callback,
                                   void * closure,
                                   assoc_table id_tagging_shortcut,
                                   assoc_table * explicit_skips,
                                   int escape_classes)
{
  DIR * dir;
  char ** files = 0;
  int n_files;
  int deferred_recursions_head;
  int deferred_recursions_tail;
  int * deferred_recursions = 0;
  int * is_deferred_nested = 0;
  char * rel_file = 0;
  struct directory_regexps * dir_regexps = 0;
  int x;

  safe_opendir (&dir, root);

  files = 0;
  n_files = 0;

  while (1)
    {
      char * file;

      safe_readdir (&file, dir);
      if (!file)
        break;
      *(char **)ar_push ((void **)&files, 0, sizeof (char *)) = file;
      ++n_files;

      /* look for per-directory inventory regexps
       */
      if (dir_regexps == 0 && (file[0] == '.') && !str_cmp(".arch-inventory", file))
        {
          dir_regexps = lim_malloc (0, sizeof *dir_regexps);
          read_directory_regexps (dir_regexps, root);
        }
    }

  safe_closedir (dir);

  qsort ((void *)files, n_files, sizeof (char *), cmp_files);

  /* We want to invoke `callback' on a lexically sorted list of paths.
   * Suppose that "foo" is a directory, but "foo-bar" also exists.
   * That means we have to invoke callbacks in the order:
   *
   *                foo
   *                foo-bar
   *                foo/xyzzy
   *
   * When we detect that "foo" is a directory, we can't
   * necessarilly recurse immediately. Instead, we keep a queue
   * of deferred directories, recursing on them at the right time.
   */

  rel_file = 0;
  deferred_recursions_head = 0;
  deferred_recursions_tail = 0;
  deferred_recursions = 0;
  is_deferred_nested = 0;

  ar_setsize ((void **)&deferred_recursions, 0, n_files, sizeof (int));
  ar_setsize ((void **)&is_deferred_nested, 0, n_files, sizeof (int));

  x = 0;
  while ((x < n_files) || (deferred_recursions_head != deferred_recursions_tail))
    {
      int is_deferred;
      int deferred_nested;
      char * file;
      struct stat stat_buf;
      int is_control = 0;

      if ((deferred_recursions_head != deferred_recursions_tail)
          && ((x >= n_files)
              || right_order_for_recursion (files[deferred_recursions[deferred_recursions_head]], files[x])))
        {
          is_deferred = 1;
          file = files[deferred_recursions[deferred_recursions_head]];
          deferred_nested = is_deferred_nested[deferred_recursions_head];
          ++deferred_recursions_head;
        }
      else
        {
          is_deferred = 0;
        deferred_nested = 0;
          file = files[x];
          ++x;
        }

      rel_file = file_name_in_vicinity (0, root, file);

      if (is_deferred)
        {
          if (deferred_nested)
            goto handle_deferred_nested;
          else
            goto handle_deferred;
        }

      /* . and .. are mandatory exclude files
       */
      if (!str_cmp (".", file) || !str_cmp ("..", file))
        {
        next_file:
          lim_free (0, rel_file);
          rel_file = 0;
          continue;
        }

      safe_lstat (rel_file, &stat_buf);
      
      /* only symlinks, directories and plain files are
       * considerably source, all others need to be precious
       */
      if (   !S_ISREG(stat_buf.st_mode)
          && !S_ISLNK(stat_buf.st_mode)
          && !S_ISDIR(stat_buf.st_mode))
        {
          goto precious_file;
        }

      /* non-printing characters, spaces, and glob characters are
       * mandatory unrecognized files
       */
      if (contains_illegal_character (file))
        {
        unrecognized_file:
          if (options->categories & arch_inventory_unrecognized)
            {
              callback (rel_file, &stat_buf, arch_inventory_unrecognized, 0, 0, closure, escape_classes);
            }
          goto next_file;
        }

      /* callers can specify a pattern for additional files to
       * exclude from consideration.
       */
      if (!options->include_excluded
          && ((dir_regexps && DIR_REGEXP(dir_regexps, excludes) && filename_matches (DIR_REGEXP(dir_regexps, excludes), file))
              || filename_matches (&options->regexps.excludes_pattern, file)))
        goto next_file;

      /* arch control files that get past the exclude pattern are
       * always source.
       */
      /* FIXME: why is arch_is_control_file called twice */
      is_control = arch_is_control_file (rel_file, file);
      if (arch_is_control_file (rel_file, file))
        {
          goto handle_source_file;
        }

      /* file names beginning with "++" are always precious.
       */
      if ((file[0] == '+') && (file[1] == '+'))
        {
          goto precious_file;
        }

      /* file names beginning with ",," are always considered junk files.
       */
      if (file[0] == ',' && file[1] == ',')
        goto junk_file;

      /* test against optional per-directory regexps first
       */
      if (dir_regexps)
        {
          regex_t * re;

          /* junk */
          re = DIR_REGEXP(dir_regexps, junk);
          if (re && filename_matches (re, file))
            goto junk_file;

          /* backup */
          re = DIR_REGEXP(dir_regexps, backup);
          if (re && filename_matches (re, file))
            goto backup_file;

          /* precious */
          re = DIR_REGEXP(dir_regexps, precious);
          if (re && filename_matches (re, file))
            goto precious_file;

          /* unrecognized */
          re = DIR_REGEXP(dir_regexps, unrecognized);
          if (re && filename_matches (re, file))
            goto unrecognized_file;

          /* source */
          re = DIR_REGEXP(dir_regexps, source);
          if (re && filename_matches (re, file))
            goto handle_source_file;
        }

      /* callers can specify a pattern for "junk" files -- files
       * presumed safe-to-be-removed by automatic tools, barring
       * concurrent tools.
       */
      if (filename_matches (&options->regexps.junk_pattern, file))
        {
        junk_file:
          if (options->categories & arch_inventory_junk)
            {
              callback (rel_file, &stat_buf, arch_inventory_junk, 0, 0, closure, escape_classes);
            }
          goto next_file;
        }

      /* callers can specify a pattern for "backup" files -- files
       * that are created by editors and similar programs to save old
       * versions
       */
      if (filename_matches (&options->regexps.backup_pattern, file))
        {
        backup_file:
          if (options->categories & arch_inventory_backup)
            {
              callback (rel_file, &stat_buf, arch_inventory_backup, 0, 0, closure, escape_classes);
            }
          goto next_file;
        }

      /* callers can specify a pattern for "precious" files -- files
       * that are not part of the source, but which should never be
       * automatically removed.
       */
      if (filename_matches (&options->regexps.precious_pattern, file))
        {
        precious_file:
          if (options->categories & arch_inventory_precious)
            {
              callback (rel_file, &stat_buf, arch_inventory_precious, 0, 0, closure, escape_classes);
            }
          goto next_file;
        }

      /* callers can specify a pattern for explicitly "unrecognized" files --
       * files that should be flagged as errors in tree-lint reports.
       */
      if (filename_matches (&options->regexps.unrecognized_pattern, file))
        {
          goto unrecognized_file;
        }

      /* finally, a pattern for "source" files -- files which are expected
       * to be source files.  Note that the option untagged_source_category
       * determines the final disposition of files which match the source
       * pattern, but have no evident id.
       *
       * If a directory appears to be a source directory, but contains a rules
       * directory of its own, then it is in fact the root of a nested tree -- not
       * a regular source file.
       */
      if (filename_matches (&options->regexps.source_pattern, file))
        {
        handle_source_file:
          if (S_ISDIR (stat_buf.st_mode) && is_nested_tree (rel_file))
            {
              if (options->categories & arch_inventory_tree)
                {
                  callback (rel_file, &stat_buf, arch_inventory_tree, 0, 1, closure, escape_classes);
                }

              if (options->nested)
                {
                  if ((x < n_files) && !right_order_for_recursion (file, files[x]))
                    {
                      deferred_recursions[deferred_recursions_tail] = x - 1;
                      is_deferred_nested[deferred_recursions_tail] = 1;
                      ++deferred_recursions_tail;
                      lim_free (0, rel_file);
                      rel_file = 0;
                      continue;
                    }

                handle_deferred_nested:
                  {
                    struct arch_inventory_options nest_opts;

                    mem_set0 ((t_uchar *)&nest_opts, sizeof nest_opts);
                    copy_options_but_regexps (&nest_opts, options);
                    arch_get_inventory_naming_conventions (&nest_opts, rel_file);
                    arch_inventory_traversal_internal (&nest_opts, rel_file, callback, closure, id_tagging_shortcut, explicit_skips, escape_classes);
                    arch_free_inventory_naming_conventions (&nest_opts);
                  }
                }
              goto next_file;
            }
          else
            {
              t_uchar * id;
              enum arch_inventory_category this_files_category;

              /* Not a nested tree.   Matches the source pattern.
               */

              id = 0;


              /* Do we need to compute the inventory id of this file which has a source name?
               *
               * Certainly so if the caller wants ids.
               * 
               * Otherwise we need the id only if we need it to verify that this
               * is, indeed, source:
               * 
               * If untagged-source is source, then we don't need the id.
               * If untagged-source is something else, then we need to see if it has an id.
               */
              if (options->want_ids || (options->untagged_source_category != arch_inventory_source))
                {
                  /* if the caller wants tags, or if we can only be certain that this is
                   * source by seeing if it has an id, get the id.
                   */
                  int untagged_is_source = (options->untagged_source_category == arch_inventory_source);

                  id = arch_inventory_id (options->method, untagged_is_source, rel_file, id_tagging_shortcut, &stat_buf, explicit_skips);
                }


              /* What is the category of the file?
               * 
               * If we've confirmed that it has an id, then it's certainly source.
               * Also if untagged-source is automatically source.
               * 
               * If untagged-source is not source there are two cases:
               * 
               * If it's a control file, then it is unrecognized, unconditionally.
               * 
               * Otherwise, the untagged-source directive tells us what it is.
               */
              if (id || (options->untagged_source_category == arch_inventory_source))
                this_files_category = arch_inventory_source;
              else if (is_control)
                this_files_category = arch_inventory_unrecognized;
              else
                this_files_category = options->untagged_source_category;


              /* Some callbacks want to see "matches source name but unrecognized 
               * for want of tag" as a source file.   They distinguish this from
               * "matches unrecognized name" or "doesn't match any pattern".
               */
              if (options->treat_unrecognized_source_as_source && (this_files_category == arch_inventory_unrecognized))
                {
                  this_files_category = arch_inventory_source;
                }

              if (this_files_category & options->categories)
                {
                  callback (rel_file, &stat_buf, this_files_category, id, 1, closure, escape_classes);
                }

              if (this_files_category != arch_inventory_source)
                goto next_file;

              lim_free (0, id);
              id = 0;

              /* recurse into directories, or plan to later.
               */
              if (S_ISDIR (stat_buf.st_mode))
                {
                  if ((x < n_files) && !right_order_for_recursion (file, files[x]))
                    {
                      deferred_recursions[deferred_recursions_tail] = x - 1;
                      is_deferred_nested[deferred_recursions_tail] = 0;
                      ++deferred_recursions_tail;
                      lim_free (0, rel_file);
                      rel_file = 0;
                      continue;
                    }

                handle_deferred:
                  arch_inventory_traversal_internal (options, rel_file, callback, closure, id_tagging_shortcut, explicit_skips, escape_classes);
                }
              goto next_file;
            }
        }
      else
        goto unrecognized_file;
    }

  for (x = 0; x < n_files; ++x)
    {
      lim_free (0, files[x]);
    }

  ar_free ((void **)&files, 0);
  lim_free (0, rel_file);
  ar_free ((void **)&deferred_recursions, 0);
  ar_free ((void **)&is_deferred_nested, 0);
  if (dir_regexps)
    {
      free_directory_regexps (dir_regexps);
      lim_free (0, dir_regexps);
    }
}




static int
cmp_files (const void * va, const void * vb)
{
  char * a;
  char * b;

  a = *(char **)va;
  b = *(char **)vb;

  return str_cmp (a, b);
}

static int
right_order_for_recursion (char * a, char * b)
{
  /* a and b are already in lexical order (a < b)
   */
  while ((*a == *b) && *a && *b)
    {
      ++a;
      ++b;
    }

  if (!*a)
    {
      /* Does "A/" come before "B" in an alphabetical listing?
       */
      return (*b > '/');
    }
  else if (!*b)
    {
      /* Does "B/" come after "A" in an aphabetical listing?
       */
      return (*a < '/');
    }
  else
    {
      invariant (*a < *b);
      return 1;
    }
}


static int
contains_illegal_character (char * filename)
{
  int x;

  for (x = 0; filename[x]; ++x)
    {
      invariant (filename[x] != '/');

      if (char_is_non_ascii (((t_uchar *)filename)[x]))
        return 1;
    }

  return 0;
}

static int
filename_matches (regex_t * pattern, char * filename)
{
  int answer;

  answer = regexec (pattern, filename, 0, 0, 0);

  if (answer == REG_NOMATCH)
    return 0;

  if (answer == REG_NOERROR)
    return 1;

  panic ("unexpected regexec error in arch_inventory_traversal");
  return -1;
}

int
arch_is_control_file (char * rel_file, char * filename)
{
  static int compiled = 0;
  static regex_t control_pattern = {0,};

  if (!compiled)
    {
      int re_error;

      re_error = regcomp (&control_pattern,
                          "^((.*/)?(\\.arch-ids(/(=id|[^/]*\\.id))?|\\{arch\\}((/[a-zA-Z=][^/~]*)(/[0-9a-zA-Z=][^/~]*)*)?|\\{arch\\}/\\.arch-project-tree|\\.arch-inventory))$",
                          REG_EXTENDED);
      invariant (!re_error);
      compiled = 1;
    }

  return filename_matches (&control_pattern, rel_file);
}


static int
is_nested_tree (char * path)
{
  int errn;
  t_uchar * ctl_file;
  struct stat stat_buf;

  ctl_file = file_name_in_vicinity (0, path, "{arch}");

  if (0 > vu_lstat (&errn, ctl_file, &stat_buf))
    {
      if (errn == ENOENT)
        {
          lim_free (0, ctl_file);
          return 0;
        }
      else
        {
        safe_printfmt (2, "Error encountered checking whether \"%s\" is a nested project tree:\n" 
        "%s accessing \"%s\" (in vu_lstat)\n", path, errno_to_string(errn), ctl_file);
        exit (2);
      }
    }

  lim_free (0, ctl_file);
  return 1;
}

static int
is_comment_line (t_uchar * line, long len)
{
  return !len || char_is_space (line[0]) || (line[0] == '#');
}

static int
sets_re (char * kw, char ** re, t_uchar * line, long len)
{
  int l;
  t_uchar * tem;
  t_uchar * tem2;

  l = str_length (kw);

  if (len < (l + 1))
    return 0;

  if (str_cmp_prefix (kw, line) || !char_is_space (line[l]))
    return 0;

  line += l;
  len -= l;
  while (len && char_is_space (line[0]))
    {
      ++line;
      --len;
    }
  while (len && char_is_space (line [len - 1]))
    --len;

  if (!*re)
    {
      tem2 = str_save_n (0, line, len); 
      tem = str_alloc_cat (0, "(", tem2);
      tem = str_realloc_cat (0, tem, ")");
      lim_free (0, tem2);
      *re = tem;
    }
  else
    {
      tem2 = str_save_n (0, line, len);
      tem = str_alloc_cat (0, "|(", tem2);
      tem = str_realloc_cat (0, tem, ")");
      *re = str_realloc_cat (0, *re, tem);
      lim_free (0, tem);
      lim_free (0, tem2);
    }

  return 1;
}

static int
sets_id_tagging_method (char * kw,
                     enum arch_id_tagging_method * method_var,
                     enum arch_inventory_category * untagged_category_var,
                     enum arch_id_tagging_method method,
                     enum arch_inventory_category untagged_category,
                     t_uchar * line, long len)
{
  int l;

  l = str_length (kw);

  if (len < (l + 1))
    return 0;

  if (str_cmp_prefix (kw, line) || !char_is_space (line[l]))
    return 0;

  *method_var = method;
  *untagged_category_var = untagged_category;
  return 1;
}

static int
sets_untagged_source_disposition (enum arch_inventory_category * untagged_category_var,
                                  t_uchar * saved_line, long saved_len)
{
  t_uchar * line = saved_line;
  long len = saved_len;
  t_uchar * kw = "untagged-source";
  int l;
  t_uchar * spec = 0;
  enum arch_inventory_category cat;

  l = str_length (kw);

  if (len < (l + 1))
    return 0;

  if (str_cmp_prefix (kw, line) || !char_is_space (line[l]))
    return 0;

  len -= l;
  line += l;

  while (len && char_is_blank (line[0]))
    {
      ++line;
      --len;
    }

  spec = line;

  while (len && char_is_alpha (line[0]))
    {
      ++line;
      --len;
    }

  spec = str_save_n (0, spec, line - spec);

  while (len && char_is_blank (line[0]))
    {
      ++line;
      --len;
    }

  if (len && (line[0] != '\n'))
    {
    syntax_error:
      safe_printfmt (2, "arch: syntax error in =tagging-method:\n  %.*s\n", (int)saved_len, saved_line);
      exit (2);
    }
  else
    {
      if (!str_cmp (spec, "source"))
        {
          cat = arch_inventory_source;
        }
      else if (!str_cmp (spec, "precious"))
        {
          cat = arch_inventory_precious;
        }
      else if (!str_cmp (spec, "backup"))
        {
          cat = arch_inventory_backup;
        }
      else if (!str_cmp (spec, "junk"))
        {
          cat = arch_inventory_junk;
        }
      else if (!str_cmp (spec, "unrecognized"))
        {
          cat = arch_inventory_unrecognized;
        }
      else
        goto syntax_error;

      *untagged_category_var = cat;
    }

  lim_free (0, spec);
  return 1;
}

static void
read_directory_regexps (struct directory_regexps * regexps, char * dir_name)
{
  char * excludes = 0;
  char * junk = 0;
  char * backup = 0;
  char * precious = 0;
  char * unrecognized = 0;
  char * source = 0;
  t_uchar * file_name = 0;
  int fd;

  file_name = file_name_in_vicinity (0, dir_name, ".arch-inventory");
  fd = safe_open (file_name, O_RDONLY, 0);
  while (1)
    {
      t_uchar * line;
      long len;

      safe_next_line (&line, &len, fd);
      if (!len)
        break;

      (void)(!is_comment_line (line, len)
             && !sets_re ("exclude", &excludes, line, len)
             && !sets_re ("junk", &junk, line, len)
             && !sets_re ("backup", &backup, line, len)
             && !sets_re ("precious", &precious, line, len)
             && !sets_re ("unrecognized", &unrecognized, line, len)
             && !sets_re ("source", &source, line, len));
    }
  safe_close (fd);

  mem_set0((t_uchar*)regexps->regexps, sizeof regexps->regexps);
  if (excludes)
    {
      if (regcomp (&regexps->storage.excludes_pattern, excludes, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `excludes' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, excludes) = &regexps->storage.excludes_pattern;
    }

  if (junk)
    {
      if (regcomp (&regexps->storage.junk_pattern, junk, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `junk' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, junk) = &regexps->storage.junk_pattern;
    }

  if (backup)
    {
      if (regcomp (&regexps->storage.backup_pattern, backup, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `backup' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, backup) = &regexps->storage.backup_pattern;
    }

  if (precious)
    {
      if (regcomp (&regexps->storage.precious_pattern, precious, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `precious' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, precious) = &regexps->storage.precious_pattern;
    }

  if (unrecognized)
    {
      if (regcomp (&regexps->storage.unrecognized_pattern, unrecognized, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `unrecognized' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, unrecognized) = &regexps->storage.unrecognized_pattern;
    }

  if (source)
    {
      if (regcomp (&regexps->storage.source_pattern, source, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `source' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, source) = &regexps->storage.source_pattern;
    }

  lim_free (0, file_name);
  lim_free (0, excludes);
  lim_free (0, junk);
  lim_free (0, backup);
  lim_free (0, precious);
  lim_free (0, unrecognized);
  lim_free (0, source);
}

static void
free_directory_regexps (struct directory_regexps * regexps)
{
  int i;

  for (i = 0; i < sizeof regexps->regexps / sizeof *regexps->regexps; ++i)
    if (regexps->regexps[i])
      regfree (regexps->regexps[i]);
}

rel_table
pick_non_control (rel_table table)
{
  int x;
  rel_table answer = 0;

  for (x = 0; x < rel_n_records (table); ++x)
    {
      t_uchar * id;

      id = table[x][1];

      if (str_cmp_prefix ("A_", id))
        rel_add_records (&answer, rel_copy_record (table[x]), 0);
    }

  return answer;
}




/* tag: Tom Lord Wed May 14 09:47:16 2003 (invent.c)
 */

Generated by  Doxygen 1.6.0   Back to index