#699 [POC] *pkg import: Undo rpmautospec processing
Merged 8 months ago by onosek. Opened 8 months ago by churchyard.
churchyard/rpkg unprocess  into  master

file modified
+12 -12
@@ -53,7 +53,7 @@ 

  from pyrpkg.spec import SpecFile

  from pyrpkg.utils import (cached_property, extract_srpm, find_me,

                            is_file_tracked, is_lookaside_eligible_file,

-                           log_result, spec_file_processed_by_rpmautospec)

+                           log_result, spec_file_undo_rpmautospec)

  

  from .gitignore import GitIgnore

  
@@ -1473,16 +1473,6 @@ 

                      uploadfiles.append(file)

                  else:

                      files.append(file)

- 

-                 # Check all specfiles in SRPM. At this point (the 'import' command can run under

-                 # the dist-git repo without any specfiles - right after initialization) we are

-                 # not able determine which the main specfile is.

-                 if file.endswith('.spec') and not file.startswith('.') \

-                         and not self.allow_pre_generated_srpm \

-                         and spec_file_processed_by_rpmautospec(file, target_dir):

-                     raise rpkgError('SRPM was processed by rpmautospec '

-                                     '(specfile "{}" was analyzed)'.format(file))

- 

          finally:

              shutil.rmtree(target_dir)

  
@@ -2040,7 +2030,7 @@ 

          # not be removed by import command.

          reserved_ourfiles = [

              'README.md', 'gating.yaml', 'tests/*', '*.rpmlintrc',

-             '.fmf/*', '*.fmf']

+             '.fmf/*', '*.fmf', 'changelog']

  

          # Get a list of files we're currently tracking

          ourfiles = self.repo.git.ls_files().split('\n')
@@ -2081,6 +2071,16 @@ 

              os.chdir(oldpath)

              raise rpkgError("Got an error from rpm2cpio: %s" % err)

  

+         # Undo rpmautospec from all the spec files.

+         # At this point (the 'import' command can run under the dist-git repo

+         # without any specfiles - right after initialization) we are

+         # not able determine which the main specfile is.

+         if not self.allow_pre_generated_srpm:

+             for file in files:

+                 if file.endswith('.spec') and not file.startswith('.'):

+                     if spec_file_undo_rpmautospec(file):

+                         self.log.debug("rpmautospec processing removed from {0}".format(file))

+ 

          # And finally add all the files we know about (and our stock files)

          for file in ('.gitignore', 'sources'):

              if not os.path.exists(file):

file modified
+67 -10
@@ -336,22 +336,79 @@ 

      return encoding == "binary"

  

  

- def spec_file_processed_by_rpmautospec(file_name, dir_path=None):

+ def _replace_lines(lines, startline, endline, replacement_lines=None, strip_endline=False):

+     replacement_lines = replacement_lines or []

+     try:

+         start = lines.index(startline)

+         end = lines.index(endline, start)

+     except ValueError:

+         # if both are missing, nothing to do, all good

+         # if only one of them is present, we better not touch it

+         return lines, False

+     else:

+         # rpmautospec adds an empty line after the end

+         # we want to remove it, but only if it is actually empty

+         if strip_endline and lines[end+1] == "\n":

+             end += 1

+         lines = lines[:start] + replacement_lines + lines[end+1:]

+         return lines, True

+ 

+ 

+ def spec_file_undo_rpmautospec(file_name, dir_path=None):

+     """

+     Given a path to specfile, undo changes generated by rpmautospec.

+     Iff there is something to undo, the specfile will be overwritten.

+ 

+     Namely:

+ 

+      1. Removes everything between the following lines:

+           ## START: Set by rpmautospec

+           ## END: Set by rpmautospec

+      2. Replaces everything between the following lines with %autochangelog:

+           ## START: Generated by rpmautospec

+           ## END: Generated by rpmautospec

+ 

+     Both of the steps only happen once. If the specfile contains multiple such sections,

+     only the first one is removed/replaced.

+ 

+     The saved spec file is not guaranteed to be bit-by-bit identical with the original

+     spec file used as an input to rpmautospec.

+     However, subsequent repeated conversions there and back should be quite stable.

+ 

+     The return value says whether the specfile was overwritten.

+     """

      file_path = os.path.join(dir_path or "", file_name)

  

      try:

-         contents = open(file_path).readlines()

+         with open(file_path) as f:

+             contents = f.readlines()

      except Exception:

-         # if we can't read it, let's assume the answer is "no".

+         # if we can't read it, let's do nothing

          return False

  

-     # Check for the %autorelease header prepended to the file

-     if any('START: Set by rpmautospec' in line for line in contents[:10]):

+     # remove the generated macro section near the beginning of the specfile

+     contents, was_removed = _replace_lines(

+         contents,

+         '## START: Set by rpmautospec\n',

+         '## END: Set by rpmautospec\n',

+         strip_endline=True)

+ 

+     # replace the generated changelog with %autochangelog

+     # note that this does not generally produce content identical to the original

+     # e.g. the macro could have been conditionalized or in curly brackets

+     # most importantly, the %changelog section might have been omitted entirely

+     # however, this should be Good Enough for most of us

+     contents, was_replaced = _replace_lines(

+         contents,

+         '## START: Generated by rpmautospec\n',

+         '## END: Generated by rpmautospec\n',

+         ['%autochangelog\n'])

+ 

+     # finally, replace the spec if needed

+     # if we cannot write it, better blow up

+     if was_removed or was_replaced:

+         with open(file_path, 'w') as f:

+             f.writelines(contents)

          return True

  

-     # It seems that currently there's no mechanism to detect

-     # %autochangelog processing. But most packages would use both

-     # %autochangelog and %autorelease together, so we should catch

-     # most cases by checking for %autorelease only.

-     # https://pagure.io/fedora-infra/rpmautospec/issue/269

      return False

Fixes https://pagure.io/fedpkg/issue/527

Depends-on: https://pagure.io/fedora-infra/rpmautospec/pull-request/312

This is a proof of concept, I haven't touched the tests at all. Will do that if this has a chance of being accepted.

For a long time already, I have had this idea that rpmautospec's problem with rpkg import could be solved as follows:

  1. In rpmautospec itself, implement command rpmautospec revert-process-distgit that does just that.
  2. In rpkg, do a minimal check to detect specfiles that are output from rpmautospec process-distgit. and revert them with the new command before importing.

This solution would have the good property that the code that needs detailed understanding of rpmautospec's working stays inside that project.
Also, the new command could be useful in other situations as well.

That said, having a working solution using a different approach now would be great, since

  1. I have unfortunately not been able to implement that idea yet, and many months have passed.
  2. It is not easy to get any change into rpmautospec, as opposed to rpkg where patches are always very welcome.
  3. rpkg can still switch to the new rpmautospec command later, should it ever materialize.

You pretty much summarized my thought process here as well.

Is this acceptable by the rpkg maintainers? If so, I can have a look at the tests.

The code looks good.
If this functionality will be covered in rpmautospec in the future, we can modify rpkg again.
If you want, I can take care of unittests for you.

If you want, I can take care of unittests for you.

That would be awesome, thanks!

What's the point of this range? strip_endlines is either 0 or 1. The loop will run at most once. Isn't the code equivalent to this?

if strip_endlines and lines[end+1] == "\n":
    end += 1

How many endlines should it strip?

The point of strip_endlines being a number is to allow easy adjustments of the code in case we need to strip more or less number of lines. Currently, the code is only called with 1 or 0, so it will indeed run at most once.

In the spirit of YAGNI, the ability to strip an arbitrary number of empty lines can be removed and the parameter can be changed to strip_endline=True|False. Let me know if you want me to do that.

It's indeed a trifle. If I can choose, I prefer a simpler version True|False. We can easily change it when needed.

Will do. I'm on PTO today, so perhaps tomorrow.

Thanks. Here is a specfile for testing:

## START: Set by rpmautospec
## (rpmautospec version 0.3.5)
## RPMAUTOSPEC: autorelease, autochangelog
%define autorelease(e:s:pb:n) %{?-p:0.}%{lua:
    release_number = 8;
    base_release_number = tonumber(rpm.expand("%{?-b*}%{!?-b:1}"));
    print(release_number + base_release_number - 1);
}%{?-e:.%{-e*}}%{?-s:.%{-s*}}%{!?-n:%{?dist}}
## END: Set by rpmautospec

# autogenerated specfile
Summary: Dummy summary
Name: docpkg
Version: 0.2
Release: %autorelease
License: GPL
Group: Applications/Productivity

BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
Source0: hello-world.txt
Source1: docpkg.tar.gz
Source2: source-without-extension
# added empty dir just to test import srpm `fedpkg import`. It should skip the dir.
Source3: EMPTY_DIR

%description
This is a dummy description.

%prep
cp %{SOURCE0} .

%build

%clean
rm -rf $$RPM_BUILD_ROOT
%install
rm -rf $RPM_BUILD_ROOT
mkdir $RPM_BUILD_ROOT
mkdir -p $RPM_BUILD_ROOT/usr/share/doc
cp %{SOURCE0} $RPM_BUILD_ROOT/usr/share/doc/hello-world.txt

%files
%doc "/usr/share/doc/hello-world.txt"

%changelog
## START: Generated by rpmautospec
* Sun Jan  1 2006 tester <tester@example.com> - 0.2-1
- New release 0.2-1

* Sun Jan  1 2006 John Doe <jdoe@example.com> - 0.2-1
- Initial version

## END: Generated by rpmautospec

2 new commits added

  • *pkg import: Undo rpmautospec processing
  • `*pkg import: Don't delete changelog generated byrpmautospec convert```
8 months ago

Anyway, I took the changes wrt strip_endline from your branch, removed an unrelated style difference in a different function, tweaked things a little, and amended it here.

Commit 2bd726d fixes this pull-request

Pull-Request has been merged by onosek

8 months ago

I have realized, that line Release: %autorelease will stay unchanged in the specfile after processing. Is it intentional?
When this could be released? Should we wait on https://pagure.io/fedora-infra/rpmautospec/pull-request/312 ? If yes, how long (new boundaries would appear in specfiles after their next update)?

I have realized, that line Release: %autorelease will stay unchanged in the specfile after processing. Is it intentional?

Yes. The idea is to get the spec file as it existed before the rpmautospec processing. We want to import it in distgit with the Release: %autorelease line.

When this could be released? Should we wait on https://pagure.io/fedora-infra/rpmautospec/pull-request/312 ?

Yes.

If yes, how long (new boundaries would appear in specfiles after their next update)?

The boundaries will appear in specfiles in SRPMs generated by the new rpmautospec. Ideally, this should require a new enough rpmautospec on RPM level (e.g. via Requires: (python3-rpmautospec >= 0.3.6 if python3-rpmautospec) or Conflicts: python3-rpmautospec < 0.3.6).

Metadata