Einen Moorhuhn-Shooter programmieren (image template matching)

Hi, ich habe mir eine kleine Herausforderung vorgenommen und möchte einen Moorhuhn-Shooter programmieren in Java. Das Problem ist aber, dass ich ohne effiziente Verfahren bzw. Techniken nicht weiter komme dabei. Die Bildverarbeitung dauert mehrere Sekunden, dann ist das Moorhuhn natürlich schon weg.

Zunächst einmal der Code:

package org.example;

import javax.imageio.ImageIO;
import javax.swing.*;
import java.awt.*;
import java.awt.event.InputEvent;
import java.awt.image.BufferedImage;
import java.awt.image.FilteredImageSource;
import java.awt.image.ImageFilter;
import java.awt.image.ImageProducer;
import java.io.IOException;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.Objects;

public class Main {
    private final BufferedImage[] moors = new BufferedImage[9];

    public Main() throws IOException, InterruptedException, AWTException {
        BufferedImage moorhuhn = biToGrayscale(ImageIO.read(Objects.requireNonNull(Main.class.getResourceAsStream("/Unbenannt.png"))));
        moors[0] = moorhuhn;
        for (int i = 1; i < moors.length; i++) {
            int a = 100 - i * 10;
            moors[i] = rescaleBi(moors[i - 1], a, a);
        }

        JFrame jFrame = new JFrame();
        jFrame.setLayout(new GridLayout(3, 3));
        for (BufferedImage b : moors) {
            jFrame.add(new JLabel(new ImageIcon(b)));
        }
        jFrame.pack();
        jFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
        jFrame.setVisible(true);

        Thread.sleep(2500);
        jFrame.setSize(jFrame.getWidth(), 50);

        while (true) {
            round();
            Thread.sleep(1000);
        }
    }

    public BufferedImage biToGrayscale(BufferedImage colorImage) {
        ImageFilter filter = new GrayFilter(true, 50);
        ImageProducer producer = new FilteredImageSource(colorImage.getSource(), filter);
        Image image = Toolkit.getDefaultToolkit().createImage(producer);
        BufferedImage newImage = new BufferedImage(image.getWidth(null), image.getHeight(null), BufferedImage.TYPE_INT_ARGB);
        Graphics2D g = newImage.createGraphics();
        g.drawImage(image, 0, 0, null);
        g.dispose();
        return newImage;
    }

    public BufferedImage rescaleBi(BufferedImage original, int newWidth, int newHeight) {
        BufferedImage resized = new BufferedImage(newWidth, newHeight, original.getType());
        Graphics2D g = resized.createGraphics();
        g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR);
        g.drawImage(original, 0, 0, newWidth, newHeight, 0, 0, original.getWidth(), original.getHeight(), null);
        g.dispose();
        return resized;
    }

    public double similar(BufferedImage small, BufferedImage big, int a, int b) {
        double sum = 0;
        for (int y = 0; y < small.getHeight(); y++) {
            for (int x = 0; x < small.getWidth(); x++) {
                int c = small.getRGB(x, y) & 0xFF;
                int d = big.getRGB(a + x, b + y) & 0xFF;
                double e = (255 - Math.abs(c - d)) / (255.0 * small.getWidth() * small.getHeight());
                sum += e;
            }
        }
        return sum;
    }

    record MR(long index, int i, int x, int y, double sum) {
    }

    public void round() throws AWTException {
        System.out.println("round = " + System.currentTimeMillis());
        Toolkit defaultToolkit = Toolkit.getDefaultToolkit();
        Dimension screenSize = defaultToolkit.getScreenSize();
        Rectangle rect = new Rectangle(screenSize);

//        rect.setSize(600, 600);

        Robot robot = new Robot();
        BufferedImage screenCapture = biToGrayscale(robot.createScreenCapture(rect));
        LinkedList<MR> mrs = new LinkedList<>();
        long index = 0;
        for (int i = 0; i < moors.length; i++) { // Diese Schleifen brauchen mehrere Sekunden
            BufferedImage bi = moors[i];
            for (int y = 0; y < screenCapture.getHeight(); y += 20) {
                for (int x = 0; x < screenCapture.getWidth(); x += 20) {
                    if (x + bi.getWidth() <= screenCapture.getWidth() && y + bi.getHeight() <= screenCapture.getHeight()) {
                        double sum = similar(bi, screenCapture, x, y);
                        mrs.add(new MR(index++, i, x, y, sum));
                    }
                }
            }
        }
        mrs.sort(Comparator.comparingDouble(MR::sum).reversed());
        MR first = mrs.getFirst();
        if (first.sum >= 0.975) {
            Point point = middle(first);
            System.out.println("first = " + first);
            System.out.println("point = " + point);
            robot.mouseMove(point.x, point.y);
            robot.mousePress(InputEvent.BUTTON1_DOWN_MASK);
            robot.mouseRelease(InputEvent.BUTTON1_DOWN_MASK);
        }
    }

    public Point middle(MR m) {
        return new Point(m.x + moors[m.i].getWidth() / 2, m.y + moors[m.i].getHeight() / 2);
    }

    public static void main(String[] args) throws IOException, InterruptedException, AWTException {
        new Main();
    }
}

Könnt ihr mir sagen, was ich verbessern kann? Ich habe schon versucht, nur 600x600 vom Bildschirm aufzunehmen und die Abtastungsweite auf 20px zu erhöhen, aber das ändert nur minimal etwas…

Hier ist noch /Unbenannt.png:

Unbenannt

Danke für Hilfe

Ich habe es jetzt hinbekommen. Es waren mehrere Sachen suboptimal:

  • Die screenCaptureSize sollte zugeschnitten werden und zudem verkleinert werden,
  • das BufferedImage muss nicht in Graustufen umgewandelt werden,
  • (man kann die Farbdifferenz auch ohne Graustufen berechnen,)
  • es ist nicht sinnvoll, die Subimages aller Größen durchzugehen,
  • (man braucht nur eins, das ungefähr die gleiche Größe hat,)
  • und der Hintergrund des Moorhuhn-Kopfs ist suboptimal gewählt,
  • (eigentlich braucht man gar keinen Hintergrund.)

Hier eine Anpassung, die also funktioniert:

package org.example;

import javax.imageio.ImageIO;
import javax.swing.*;
import java.awt.*;
import java.awt.event.InputEvent;
import java.awt.image.BufferedImage;
import java.awt.image.FilteredImageSource;
import java.awt.image.ImageFilter;
import java.awt.image.ImageProducer;
import java.io.IOException;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.Objects;

public class Main {
    private final BufferedImage[] moors = new BufferedImage[9];

    public Main() throws IOException, InterruptedException, AWTException {
        BufferedImage moorhuhn = biToGrayscale(ImageIO.read(Objects.requireNonNull(Main.class.getResourceAsStream("/Unbenannt.png"))));
        moors[0] = moorhuhn;
        for (int i = 1; i < moors.length; i++) {
            int a = 100 - i * 10;
            moors[i] = rescaleBi(moorhuhn, a, a);
        }

        JFrame jFrame = new JFrame();
        jFrame.setLayout(new GridLayout(3, 3));
        for (BufferedImage b : moors) {
            jFrame.add(new JLabel(new ImageIcon(b)));
        }
        jFrame.pack();
        jFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
        jFrame.setVisible(true);

        Thread.sleep(2500);
        jFrame.setSize(jFrame.getWidth(), 50);

        while (true) {
            round();
            Thread.sleep(1500);
        }
    }

    public BufferedImage biToGrayscale(BufferedImage colorImage) {
        ImageFilter filter = new GrayFilter(true, 50);
        ImageProducer producer = new FilteredImageSource(colorImage.getSource(), filter);
        Image image = Toolkit.getDefaultToolkit().createImage(producer);
        BufferedImage newImage = new BufferedImage(image.getWidth(null), image.getHeight(null), BufferedImage.TYPE_INT_ARGB);
        Graphics2D g = newImage.createGraphics();
        g.drawImage(image, 0, 0, null);
        g.dispose();
        // return newImage;
        return colorImage;
    }

    public BufferedImage rescaleBi(BufferedImage original, int newWidth, int newHeight) {
        BufferedImage resized = new BufferedImage(newWidth, newHeight, original.getType());
        Graphics2D g = resized.createGraphics();
        g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR);
        g.drawImage(original, 0, 0, newWidth, newHeight, 0, 0, original.getWidth(), original.getHeight(), null);
        g.dispose();
        return resized;
    }

    public double similar(BufferedImage small, BufferedImage big, int a, int b) {
        double sum = 0;
        for (int y = 0; y < small.getHeight(); y++) {
            for (int x = 0; x < small.getWidth(); x++) {
                int r1 = small.getRGB(x, y);
                int r2 = big.getRGB(a + x, b + y);
                int red1 = (r1 >> 16) & 0x0ff;
                int green1 = (r1 >> 8) & 0x0ff;
                int blue1 = (r1) & 0x0ff;
                int red2 = (r2 >> 16) & 0x0ff;
                int green2 = (r2 >> 8) & 0x0ff;
                int blue2 = (r2) & 0x0ff;
                double d = ((red2 - red1) * (red2 - red1) + (green2 - green1) * (green2 - green1) + (blue2 - blue1) * (blue2 - blue1)) / 195075.0;
                double e = (1.0 - d) / (small.getWidth() * small.getHeight());
                sum += e;
            }
        }
        return sum;
    }

    record MR(long index, int i, int x, int y, double sum) {
    }

    public void round() throws AWTException {
        System.out.println("round = " + System.currentTimeMillis());
        Toolkit defaultToolkit = Toolkit.getDefaultToolkit();
        Dimension screenSize = defaultToolkit.getScreenSize();
        Rectangle rect = new Rectangle(screenSize);
        rect.setSize(800, 800);
        Robot robot = new Robot();
        BufferedImage screenCapture = biToGrayscale(rescaleBi(robot.createScreenCapture(rect), 200, 200));
        LinkedList<MR> mrs = new LinkedList<>();
        long index = 0;
        for (int i = moors.length - 1; i < moors.length; i++) {
            BufferedImage bi = moors[i];
            for (int y = 0; y < screenCapture.getHeight(); y += 5) {
                for (int x = 0; x < screenCapture.getWidth(); x += 5) {
                    if (x + bi.getWidth() <= screenCapture.getWidth() && y + bi.getHeight() <= screenCapture.getHeight()) {
                        double sum = similar(bi, screenCapture, x, y);
                        mrs.add(new MR(index++, i, x, y, sum));
                    }
                }
            }
        }
        mrs.sort(Comparator.comparingDouble(MR::sum).reversed());
        MR first = mrs.getFirst();
        if (first.sum >= 0.95) {
            Point point = middle(first);
            System.out.println("first = " + first);
            System.out.println("point = " + point);
            robot.mouseMove(point.x, point.y);
            robot.mousePress(InputEvent.BUTTON1_DOWN_MASK);
            robot.mouseRelease(InputEvent.BUTTON1_DOWN_MASK);
        }
    }

    public Point middle(MR m) {
        return new Point(m.x * 4 + 50, m.y * 4 + 50);
    }

    public static void main(String[] args) throws IOException, InterruptedException, AWTException {
        new Main();
    }
}

Wenn ihr dennoch ein „besseres“ Verfahren kennt, würde mich das natürlich auch interessieren.

Ich bin mir nicht sicher, wie das in OpenCV zum Beispiel implementiert ist.

Ich habe da (unbewusst) den mean squared error between two rectangular images (a small and a big one) berechnet. Siehe hier:

Es gäbe aber noch die „fourier transformation to the problem of subimage finding“.

Das Problem wäre aber weiterhin noch folgendes:

  1. Das Moorhuhn (sub image) ist unterschiedlich groß,
  2. der Hintergrund ändert sich.

Könnte das mit der Fourier-Transformation gelöst werden? Und Danke…