#47 resolve-deps: Add a --json flag to get full output in JSON form
Closed 7 years ago by ncoghlan. Opened 7 years ago by otaylor.
modularity/ otaylor/fedmod json-output  into  master

file modified
+59 -39
@@ -1,3 +1,4 @@ 

+ import collections

  import configparser

  import itertools

  import logging
@@ -69,39 +70,45 @@ 

      assert len(solvables) == 1

      return solvables[0]

  

- def print_transaction(pool, transaction):

+ def _get_dependency_details(pool, transaction):

      candq = transaction.newpackages()

-     if log.getEffectiveLevel() <= logging.INFO:

-         tb = smartcols.Table()

-         tb.title = "DEPENDENCY INFORMATION"

-         cl = tb.new_column("INFO")

-         cl.tree = True

-         cl_match = tb.new_column("MATCH")

-         for p in candq:

-             ln = tb.new_line()

-             ln[cl] = str(p)

-             for dep in p.lookup_deparray(solv.SOLVABLE_REQUIRES):

-                 lns = tb.new_line(ln)

-                 lns[cl] = str(dep)

-                 matches = set(s for s in candq if s.matchesdep(solv.SOLVABLE_PROVIDES, dep))

-                 if not matches and str(dep).startswith("/"):

-                     # Append provides by files

-                     # TODO: use Dataiterator for getting filelist

-                     matches = set(s for s in pool.select(str(dep), solv.Selection.SELECTION_FILELIST).solvables() if s in candq)

-                 # It was possible to resolve set, so something is wrong here

-                 assert matches

-                 first = True

-                 for m in matches:

-                     if first:

-                         lnc = lns

-                     else:

-                         lnss = tb.new_line(lns)

-                         lnc = lnss

-                         first = False

-                     lnc[cl_match] = str(m)

-         log.info(tb)

- 

- def _solve(solver, pkgnames):

+     result = {}

+     for p in candq:

+         pkg_details = {}

+         for dep in p.lookup_deparray(solv.SOLVABLE_REQUIRES):

+             matches = set(s for s in candq if s.matchesdep(solv.SOLVABLE_PROVIDES, dep))

+             if not matches and str(dep).startswith("/"):

+                 # Append provides by files

+                 # TODO: use Dataiterator for getting filelist

+                 matches = set(s for s in pool.select(str(dep), solv.Selection.SELECTION_FILELIST).solvables() if s in candq)

+             # It was possible to resolve set, so something is wrong here

+             assert matches

+             # While multiple packages providing the same thing is certainly possible, it is rare, and

+             # the confusion from picking one at random is worth the the simplification.

+             pkg_details[str(dep)] = sorted(str(m) for m in matches)[0]

+         result[str(p)] = pkg_details

+ 

+     return result

+ 

+ def print_transaction(details):

+     tb = smartcols.Table()

+     tb.title = "DEPENDENCY INFORMATION"

+     cl = tb.new_column("INFO")

+     cl.tree = True

+     cl_match = tb.new_column("MATCH")

+     for p in sorted(details):

+         ln = tb.new_line()

+         ln[cl] = p

+         deps = details[p]

+         for dep in sorted(deps):

+             lns = tb.new_line(ln)

+             lns[cl] = dep

+             lns[cl_match] = deps[dep]

+     log.info(tb)

+ 

+ FullInfo = collections.namedtuple('FullInfo', ['name', 'rpm', 'srpm', 'requires'])

+ 

+ def _solve(solver, pkgnames, full_info=False):

      """Given a set of package names, returns a list of solvables to install"""

      pool = solver.pool

  
@@ -125,19 +132,31 @@ 

          for problem in problems:

              log.warn(problem)

  

-     print_transaction(pool, solver.transaction())

-     result = set()

+     if log.getEffectiveLevel() <= logging.INFO or full_info:

+         dep_details = _get_dependency_details(pool, solver.transaction())

+         if log.getEffectiveLevel() <= logging.INFO:

+             print_transaction(dep_details)

+ 

+     if full_info:

+         result = []

+     else:

+         result = set()

      for s in solver.transaction().newpackages():

          if s.name.startswith("fedora-release"):

              # Relying on the F27 metadata injects irrelevant fedora-release deps

              continue

          if s.arch in ("src", "nosrc"):

              continue

-         # Ensure the solvables don't outlive the solver that created them

-         result.add(s.name)

+         # Ensure the solvables don't outlive the solver that created them by

+         # extracting the information we want but not returning the solvable.

+         if full_info:

+             rpm = str(s)

+             result.append(FullInfo(s.name, rpm, s.lookup_sourcepkg()[:-4], dep_details[rpm]))

+         else:

+             result.add(s.name)

      return result

  

- def ensure_buildable(pool, pkgnames):

+ def ensure_buildable(pool, pkgnames, full_info=False):

      """Given a set of solvables, returns a set of source packages & build deps"""

      # The given package set may not be installable on its own

      # That's OK, since other modules will provide those packages
@@ -152,7 +171,8 @@ 

  

  _DEFAULT_HINTS = ("glibc-minimal-langpack",)

  

- def ensure_installable(pool, pkgnames, hints=_DEFAULT_HINTS, recommendations=False):

+ def ensure_installable(pool, pkgnames, hints=_DEFAULT_HINTS,

+                        recommendations=False, full_info=False):

      """Iterate over the resolved dependency set for the given packages

  

      *hints*:  Packages that have higher priority when more than one package
@@ -174,7 +194,7 @@ 

          # Ignore weak deps

          solver.set_flag(solv.Solver.SOLVER_FLAG_IGNORE_RECOMMENDED, 1)

  

-     return _solve(solver, pkgnames)

+     return _solve(solver, pkgnames, full_info=full_info)

  

  def print_reldeps(pool, pkg):

      sel = pool.select(pkg, solv.Selection.SELECTION_NAME | solv.Selection.SELECTION_DOTARCH)

file modified
+8 -1
@@ -88,6 +88,11 @@ 

              help="Module to be used as a dependency. Can be used multiple times.",

          )

          parser_resolve_deps.add_argument(

+             "--json",

+             action='store_true',

+             help="Output dependencies in JSON format with extra information.",

+         )

+         parser_resolve_deps.add_argument(

I just migrated the CLI option handling fully over to click, so this will need to move up to be a new @click.option entry on the def resolve_deps command.

The decorator call will need to be something like:

@click.option("--json", is_flag=True, default=False, help="Output dependencies in JSON format with extra information.")

and then add a new json parameter to the function.

              "pkgs",

              metavar='PKGS',

              nargs='+',
@@ -163,7 +168,9 @@ 

              )

          elif cli.args.cmd_name == 'resolve-deps':

              rq = ModuleRepoquery()

-             rq.list_pkg_deps(cli.args.pkgs, cli.args.module_dependency)

+             rq.list_pkg_deps(cli.args.pkgs,

+                              module_deps=cli.args.module_dependency,

+                              json_output=cli.args.json)

With the migrated CLI handling, this will be json_output=json in the resolve_deps function.

          elif cli.args.cmd_name == 'module-packages':

              rq = ModuleRepoquery()

              rq.list_rpms_in_module(cli.args.module, full_nevra=cli.args.full_nevra)

@@ -1,5 +1,6 @@ 

  from __future__ import absolute_import

  

+ import json

  import sys

  import modulemd

  import logging
@@ -36,7 +37,7 @@ 

                  else:

                      print(_name_only(name))

      

-     def list_pkg_deps(self, pkgs, module_deps):

+     def list_pkg_deps(self, pkgs, module_deps=None, json_output=False):

          _repodata._populate_module_reverse_lookup()

          pkgs_in_modules = set()

          if module_deps:
@@ -45,11 +46,24 @@ 

                  pkgs_in_modules |= set(map(lambda x: _name_only(x), rpm_names))

                  

          pool = _depchase.make_pool("x86_64")

-         run_deps = _depchase.ensure_installable(pool, pkgs)

-         rpm_names = run_deps - pkgs_in_modules

-         if rpm_names:

-             for name in rpm_names:

-                 print(name)

+         if json_output:

+             run_deps = _depchase.ensure_installable(pool, pkgs, full_info=True)

+             result = []

+             for info in run_deps:

+                 if info.name in  pkgs_in_modules:

+                     continue

+                 result.append({

+                     'rpm': info.rpm,

+                     'srpm': info.srpm,

+                     'requires': info.requires,

+                 })

+             json.dump(result, sys.stdout, indent=4, sort_keys=True)

+         else:

+             run_deps = _depchase.ensure_installable(pool, pkgs)

+             rpm_names = run_deps - pkgs_in_modules

+             if rpm_names:

+                 for name in rpm_names:

+                     print(name)

  

      def list_modularized_pkgs(self, duplicate_only=False, list_modules=False):

          _repodata._populate_module_reverse_lookup()

When --json is passed to fedmod resolve-deps, return output in JSON form that contains, for each package

  • The NVR of the package
  • The NVR of the corresponding source package
  • The package in the transaction that satisfied each requirement

This is intended to allow other tools to provide higher-level reporting or analysis while sharing the underlying data sources and dependency-reporting logic of fedmod.

In particular, I'm using this for tooling to create module contents based based on the contents of the "upstream" flatpak runtimes. Example report

I haven't updated tests or docs in this PR yet - wanted to get feedback before I went ahead and did that.

The original attempt at this produced output like:

{
    "requires": {
        "libc.so.6(GLIBC_2.14)(64bit)": [
            "glibc-2.26-15.fc27.x86_64",
         ], 
        "rtld(GNU_HASH)": [
             "glibc-2.26-15.fc27.x86_64"
         ]
    },
    "rpm": "libtasn1-4.12-3.fc27.x86_64",
    "srpm": "libtasn1-4.12-3.fc27.src"
},

Where every dependency is a single element list. The second patch in this PR reduces accuracy for simplicity and just picks one in the rare case when multiple packages in the transaction provide a dependency. I think this is an OK tradeoff, but the lists could be left if desired.

Thanks for working on this!

After some of our earlier experiences with the "just pick one" shortcut for choosing which module to rely on for a particular RPM, I think we're going to be better off keeping the lists in the fedmod layer's output.

My rationale for that is that it's relatively easy for a wrapper layer to turn single-item lists into plain strings, but if we actually throw any raw data away, then there's no way for a wrapper layer to recover it.

The core approach you've taken in the PR looks fine to me, so my main comment is just what you already mentioned: it needs doc and test updates to match the revised implementation.

Thinking further about the list-or-not question, given that JSON is a typed format, we could potentially collapse single-element lists as a string, while leaving multi-element lists as lists. That would avoid ever losing data, while still simplifying the output for the typical case (i.e. no ambiguous dependencies).

I don't like the idea of flexible typing. Since the list of what package satisfies a requirement is 99.9% of the time a single element, having it only be a list when it's a multiple element is an invitation for people to write code that falls over in the 0.1% case.

To make the question a bit less nebulous: the only example I could find/think up: fedmod resolve-deps --json fontconfig dejavu-sans-mono-fonts liberation-sans-fonts --module=platform. fontconfig has an 'font(:lang=en)' requirement that is satisfied by both dejavu-sans-mono-fonts and liberation-sans-fonts.

So you end up with:

   "requires": {
        "/sbin/ldconfig": [
            "glibc-2.26-15.fc27.x86_64"
        ],
        "font(:lang=en)": [
            "dejavu-sans-mono-fonts-2.35-5.fc27.noarch",
            "liberation-sans-fonts-1:1.07.4-9.fc27.noarch"
        ],
        "fontpackages-filesystem": [
            "fontpackages-filesystem-1.44-19.fc27.noarch"
        ],
        "libc.so.6(GLIBC_2.14)(64bit)": [
            "glibc-2.26-15.fc27.x86_64"
        ],
        "libexpat.so.1()(64bit)": [
            "expat-2.2.4-1.fc27.x86_64"
        ],
        "libfreetype.so.6()(64bit)": [
            "freetype-2.8-6.fc27.x86_64"
        ],
        "libpthread.so.0()(64bit)": [
            "glibc-2.26-15.fc27.x86_64"
        ],
        "libpthread.so.0(GLIBC_2.2.5)(64bit)": [
            "glibc-2.26-15.fc27.x86_64"
        ],
        "rtld(GNU_HASH)": [
            "glibc-2.26-15.fc27.x86_64"
        ]
    },

My use case is "what pulled the package into the transaction" and here were both specified directly - that's why there is the duplication. But maybe the extra information is useful to someone... I was just trying to keep things a bit simpler.

Anyways, I don't mind leaving it in list form - but I don't want to make it an either/or because that seems non-robust to me.

OK, let's go with the list form then - it's easy enough to flatten for display purposes, and it makes it more likely consumers will be written to handle the "multiple providers" case.

I just migrated the CLI option handling fully over to click, so this will need to move up to be a new @click.option entry on the def resolve_deps command.

The decorator call will need to be something like:

@click.option("--json", is_flag=True, default=False, help="Output dependencies in JSON format with extra information.")

and then add a new json parameter to the function.

With the migrated CLI handling, this will be json_output=json in the resolve_deps function.

@otaylor Would you mind if I took this PR over, updated it for the revised CLI infrastructure, and switched it to always reporting lists for the "usually-only-one-but-sometimes-more-than-one" fields?

I've created an updated PR at https://pagure.io/modularity/fedmod/pull-request/63 with the above changes.

I'll add tests & docs and merge it tomorrow.

Pull-Request has been closed by ncoghlan

7 years ago