Quellcode durchsuchen

[bin] Update where we store media

Colin Powell vor 5 Jahren
Ursprung
Commit
c3bece9e55

+ 2 - 2
bin/.bin/changepaper

@@ -16,8 +16,8 @@ else
 fi
 
 # Choices: astrobin,natgeo,nasa,unsplash,bing
-BASEDIR="$HOME/var/backgrounds/$SOURCE/"
-SEARX_BASEDIR="$HOME/var/backgrounds/bing/"
+BASEDIR="$HOME/var/media/backgrounds/$SOURCE/"
+SEARX_BASEDIR="$HOME/var/media/backgrounds/bing/"
 
 # Get daily NatGeo POTD
 python3 ~/.bin/get_natgeo_potd.py

+ 1 - 1
bin/.bin/get_astrobin_potd.py

@@ -7,7 +7,7 @@ import requests
 
 today = datetime.today().strftime("%Y-%m-%d")
 home = os.path.expanduser("~")
-target_path = f"{home}/var/backgrounds/astrobin/{today}.jpg"
+target_path = f"{home}/var/media/backgrounds/astrobin/{today}.jpg"
 
 # If the file for today already exists, just exit
 if os.path.isfile(target_path):

+ 1 - 1
bin/.bin/get_bing_potd.py

@@ -7,7 +7,7 @@ import requests
 
 today = datetime.today().strftime("%Y-%m-%d")
 home = os.path.expanduser("~")
-target_path = f"{home}/var/backgrounds/bing/{today}.jpg"
+target_path = f"{home}/var/media/backgrounds/bing/{today}.jpg"
 
 # If the file for today already exists, just exit
 if os.path.isfile(target_path):

+ 1 - 1
bin/.bin/get_natgeo_potd.py

@@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
 
 today = datetime.today().strftime("%Y-%m-%d")
 home = os.path.expanduser("~")
-target_path = f"{home}/var/backgrounds/natgeo/{today}.jpg"
+target_path = f"{home}/var/media/backgrounds/natgeo/{today}.jpg"
 
 # If the file for today already exists, just exit
 if os.path.isfile(target_path):

+ 1 - 1
bin/.bin/get_unsplash_potd.py

@@ -7,7 +7,7 @@ import requests
 
 today = datetime.today().strftime("%Y-%m-%d")
 home = os.path.expanduser("~")
-target_path = f"{home}/var/backgrounds/unsplash/{today}.jpg"
+target_path = f"{home}/var/media/backgrounds/unsplash/{today}.jpg"
 
 # If the file for today already exists, just exit
 if os.path.isfile(target_path):

+ 491 - 0
bin/.bin/imapdedup

@@ -0,0 +1,491 @@
+#! /usr/bin/env python3
+#
+#  imapdedup.py
+#
+#  Looks for duplicate messages in a set of IMAP mailboxes and removes all but the first.
+#  Comparison is normally based on the Message-ID header.
+#
+#  Default behaviour is purely to mark the duplicates as deleted.  Some mail clients
+#  will allow you to view these and undelete them if you change your mind.
+#
+#  Copyright (c) 2013-2020 Quentin Stafford-Fraser.
+#  All rights reserved, subject to the following:
+#
+#
+#   This is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This software is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this software; if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+#   USA.
+#
+
+import getpass
+import hashlib
+import imaplib
+import os
+import optparse
+import re
+import socket
+import sys
+from typing import List, Dict, Tuple, Optional, Union, Type, Any
+
+from email.parser import BytesParser
+from email.message import Message
+from email.errors import HeaderParseError
+from email.header import decode_header
+
+# Increase the rather small limit on result line-length
+# imposed in certain imaplib versions.
+# imaplib._MAXLINE = max(2000000, imaplib._MAXLINE)
+
+
+class ImapDedupException(Exception):
+    pass
+
+
+# IMAP responses should normally begin 'OK' - we strip that off
+def check_response(resp: Tuple[str, List[bytes]]):
+    status, value = resp
+    if status != "OK":
+        raise ImapDedupException("Got response: %s from server" % str(value))
+    return value
+
+
+def get_arguments(args: List[str]) -> Tuple[optparse.Values, List[str]]:
+    # Get arguments and create link to server
+
+    parser = optparse.OptionParser(usage="%prog [options] <mailboxname> [<mailboxname> ...]")
+    parser.add_option(
+        "-P", "--process", dest="process", help="IMAP process to access mailboxes"
+    )
+    parser.add_option("-s", "--server", dest="server", help="IMAP server")
+    parser.add_option("-p", "--port", dest="port", help="IMAP server port", type="int")
+    parser.add_option("-x", "--ssl", dest="ssl", action="store_true", help="Use SSL")
+    parser.add_option("-X", "--starttls", dest="starttls", action="store_true", help="Require STARTTLS")
+    parser.add_option("-u", "--user", dest="user", help="IMAP user name")
+    parser.add_option(
+        "-w",
+        "--password",
+        dest="password",
+        help="IMAP password (Will prompt if not specified)",
+    )
+    parser.add_option(
+        "-v", "--verbose", dest="verbose", action="store_true", help="Verbose mode"
+    )
+    parser.add_option(
+        "-n",
+        "--dry-run",
+        dest="dry_run",
+        action="store_true",
+        help="Don't actually do anything, just report what would be done",
+    )
+    parser.add_option(
+        "-c",
+        "--checksum",
+        dest="use_checksum",
+        action="store_true",
+        help="Use a checksum of several mail headers, instead of the Message-ID",
+    )
+    parser.add_option(
+        "-m",
+        "--checksum-with-id",
+        dest="use_id_in_checksum",
+        action="store_true",
+        help="Include the Message-ID (if any) in the -c checksum.",
+    )
+    parser.add_option(
+        "",
+        "--no-close",
+        dest="no_close",
+        action="store_true",
+        help='Do not "close" mailbox when done. Some servers will purge deleted messages on a close command.',
+    )
+    parser.add_option(
+        "-l",
+        "--list",
+        dest="just_list",
+        action="store_true",
+        help="Just list mailboxes",
+    )
+
+    parser.set_defaults(
+        verbose=False, ssl=False, dry_run=False, no_close=False, just_list=False
+    )
+    (options, mboxes) = parser.parse_args(args)
+    if ((not options.server) or (not options.user)) and not options.process:
+        sys.stderr.write(
+            "\nError: Must specify server, user, and at least one mailbox.\n\n"
+        )
+        parser.print_help()
+        sys.exit(1)
+    if not options.password and not options.process:
+        # Read from IMAPDEDUP_PASSWORD env variable, or prompt for one.
+        options.password = os.getenv("IMAPDEDUP_PASSWORD") or getpass.getpass()
+
+    if options.use_id_in_checksum and not options.use_checksum:
+        sys.stderr.write("\nError: If you use -m you must also use -c.\n")
+        sys.exit(1)
+
+    return (options, mboxes)
+
+
+# Thanks to http://www.doughellmann.com/PyMOTW/imaplib/
+list_response_pattern = re.compile(
+    rb'\((?P<flags>.*?)\) "(?P<delimiter>.*)" (?P<name>.*)'
+)
+
+
+def parse_list_response(line: bytes):
+    m = list_response_pattern.match(line)
+    if m is None:
+        sys.stderr.write("\nError: parsing list response '{}'".format(str(line)))
+        sys.exit(1)
+    flags, delimiter, mailbox_name = m.groups()
+    mailbox_name = mailbox_name.strip(b'"')
+    return (flags, delimiter, mailbox_name)
+
+
+def str_header(parsed_message: Message, name: str) -> str:
+    """"
+    Return the value (of the first instance, if more than one) of
+    the given header, as a unicode string.
+    """
+    hdrlist = decode_header(parsed_message.get(name, ""))
+    btext, charset = hdrlist[0]
+    if isinstance(btext, str):
+        text = btext
+    else:
+        text = btext.decode("utf-8", "ignore")
+    return text
+
+
+def get_message_id(
+    parsed_message: Message, options_use_checksum=False, options_use_id_in_checksum=False
+) -> Optional[str]:
+    """
+    Normally, return the Message-ID header (or print a warning if it doesn't
+    exist and return None).
+
+    If options_use_checksum is specified, use md5 hash of several headers
+    instead.
+
+    For more safety, user should first do a dry run, reviewing them before
+    deletion. Problems are extremely unlikely, but md5 is not collision-free.
+
+    If options_use_id_in_checksum is specified, then the Message-ID will be
+    included in the header checksum, otherwise it is excluded.
+    """
+    try:
+        if options_use_checksum:
+            md5 = hashlib.md5()
+            md5.update(("From:" + str_header(parsed_message, "From")).encode())
+            md5.update(("To:" + str_header(parsed_message, "To")).encode())
+            md5.update(("Subject:" + str_header(parsed_message, "Subject")).encode())
+            md5.update(("Date:" + str_header(parsed_message, "Date")).encode())
+            md5.update(("Cc:" + str_header(parsed_message, "Cc")).encode())
+            md5.update(("Bcc:" + str_header(parsed_message, "Bcc")).encode())
+            if options_use_id_in_checksum:
+                md5.update(("Message-ID:" + str_header(parsed_message, "Message-ID")).encode())
+            msg_id = md5.hexdigest()
+            # print(msg_id)
+        else:
+            msg_id = str_header(parsed_message, "Message-ID")
+            if not msg_id:
+                print(
+                    (
+                        "Message '%s' dated '%s' has no Message-ID header."
+                        % (
+                            str_header(parsed_message, "Subject"),
+                            str_header(parsed_message, "Date"),
+                        )
+                    )
+                )
+                print("You might want to use the -c option.")
+                return None
+        return msg_id
+    except (ValueError, HeaderParseError):
+        print(
+            "WARNING: There was an exception trying to parse the headers of this message."
+        )
+        print("It may be corrupt, and you might consider deleting it.")
+        print(
+            (
+                "Subject: %s\nFrom: %s\nDate: %s\n"
+                % (
+                    parsed_message["Subject"],
+                    parsed_message["From"],
+                    parsed_message["Date"],
+                )
+            )
+        )
+        print("Message skipped.")
+        return None
+
+
+def get_mailbox_list(server: imaplib.IMAP4) -> List[str]:
+    """
+    Return a list of usable mailbox names
+    """
+    resp = []
+    for mb in check_response(server.list()):
+        bits = parse_list_response(mb)
+        if rb"\\Noselect" not in bits[0]:
+            resp.append(bits[2].decode())
+    return resp
+
+def get_deleted_msgnums(server: imaplib.IMAP4) -> List[int]:
+    """
+    Return a list of ids of deleted messages in the folder.
+    """
+    resp = []
+    deleted_info = check_response(server.search(None, "DELETED"))
+    if deleted_info:   
+        # If neither None nor empty, then
+        # the first item should be a list of msg ids
+        resp = [int(n) for n in deleted_info[0].split()]
+    return resp
+
+def get_undeleted_msgnums(server: imaplib.IMAP4) -> List[int]:
+    """
+    Return a list of ids of non-deleted messages in the folder.
+    """
+    resp = []
+    undeleted_info = check_response(server.search(None, "UNDELETED"))
+    if undeleted_info:   
+        # If neither None nor empty, then
+        # the first item should be a list of msg ids
+        resp = [int(n) for n in undeleted_info[0].split()]
+    return resp
+
+
+def mark_messages_deleted(server: imaplib.IMAP4, msgs_to_delete: List[int]):
+    message_ids = ",".join(map(str, msgs_to_delete))
+    check_response(
+        server.store(message_ids, "+FLAGS", r"(\Deleted)")
+    )
+
+def get_msg_headers(server: imaplib.IMAP4, msg_ids: List[int]) -> List[Tuple[int, bytes]]:
+    """
+    Get the dict of headers for each message in the list of provided IDs.
+    Return a list of tuples:  [ (msgid, header_bytes), (msgid, header_bytes)... ]
+    The returned header_bytes can be parsed by 
+    """
+    # Get the header info for each message
+    message_ids_str = ",".join(map(str, msg_ids))
+    ms = check_response(server.fetch(message_ids_str, "(RFC822.HEADER)"))
+
+    # There are two lines per message in the response
+    resp: List[Tuple[int, bytes]] = []
+    for ci in range(0, len(ms) // 2):
+        mnum = int(msg_ids[ci])
+        _, hinfo = ms[ci * 2]
+        resp.append((mnum, hinfo))
+    return resp
+
+
+def print_message_info(parsed_message: Message):
+    print("From: " + str_header(parsed_message, "From"))
+    print("To: " + str_header(parsed_message, "To"))
+    print("Cc: " + str_header(parsed_message, "Cc"))
+    print("Bcc: " + str_header(parsed_message, "Bcc"))
+    print("Subject: " + str_header(parsed_message, "Subject"))
+    print("Date: " + str_header(parsed_message, "Date"))
+    print("")
+
+
+# This actually does the work
+def process(options, mboxes: List[str]):
+    serverclass: Type[Any]
+    if options.process:
+        serverclass = imaplib.IMAP4_stream
+    elif options.ssl:
+        serverclass = imaplib.IMAP4_SSL
+    else:
+        serverclass = imaplib.IMAP4
+
+    try:
+        if options.process:
+            server = serverclass(options.process)
+        elif options.port:
+            server = serverclass(options.server, options.port)
+        else:
+            # Use the default, which will be different depending on SSL choice
+            server = serverclass(options.server)
+    except socket.error as e:
+        sys.stderr.write(
+            "\nFailed to connect to server. Might be host, port or SSL settings?\n"
+        )
+        sys.stderr.write("%s\n\n" % e)
+        sys.exit(1)
+
+    if ("STARTTLS" in server.capabilities) and hasattr(server, "starttls"):
+        server.starttls()
+    elif options.starttls:
+        sys.stderr.write("\nError: Server did not offer TLS\n")
+        sys.exit(1)
+    elif not options.ssl:
+        sys.stderr.write("\nWarning: Unencrypted connection\n")
+
+    try:
+        if not options.process:
+            server.login(options.user, options.password)
+    except:
+        sys.stderr.write("\nError: Login failed\n")
+        sys.exit(1)
+
+    # List mailboxes option
+    # Just do that and then exit
+    if options.just_list:
+        for mb in get_mailbox_list(server):
+            print(mb)
+        return
+
+    if len(mboxes) == 0:
+        sys.stderr.write("\nError: Must specify mailbox\n")
+        sys.exit(1)
+
+    # OK - let's get started.
+    # Iterate through a set of named mailboxes and delete the later messages discovered.
+    try:
+        parser = BytesParser()  # can be the same for all mailboxes
+        # Create a list of previously seen message IDs, in any mailbox
+        msg_ids: Dict[str, str] = {}
+        for mbox in mboxes:
+            msgs_to_delete = []  # should be reset for each mbox
+            msg_map = {}  # should be reset for each mbox
+
+            # Make sure mailbox name is surrounded by quotes if it contains a space
+            if " " in mbox and (mbox[0] != '"' or mbox[-1] != '"'):
+                mbox = '"' + mbox + '"'
+
+            # Select the mailbox
+            msgs = check_response(server.select(mailbox=mbox, readonly=options.dry_run))[0]
+            print("There are %d messages in %s." % (int(msgs), mbox))
+
+            # Check how many messages are already marked 'deleted'...
+            numdeleted = len(get_deleted_msgnums(server))
+            print(
+                "%s message(s) currently marked as deleted in %s"
+                % (numdeleted or "No", mbox)
+            )
+
+            # Now get a list of the ones that aren't deleted. 
+            # That's what we'll actually use.
+            msgnums = get_undeleted_msgnums(server)
+            print("%s others in %s" % (len(msgnums), mbox))
+
+            chunkSize = 100
+            if options.verbose:
+                print("Reading the others... (in batches of %d)" % chunkSize)
+
+            for i in range(0, len(msgnums), chunkSize):
+                if options.verbose:
+                    print("Batch starting at item %d" % i)
+
+                # and parse them.
+                for mnum, hinfo in get_msg_headers(server, msgnums[i: i + chunkSize]):
+                    # Parse the header info into a Message object
+                    mp = parser.parsebytes(hinfo)
+
+                    if options.verbose:
+                        print("Checking %s message %s" % (mbox, mnum))
+                        # Store message only when verbose is enabled (to print it later on)
+                        msg_map[mnum] = mp
+
+                    # Record the message-ID header (or generate one from other headers)
+                    msg_id = get_message_id(
+                        mp, options.use_checksum, options.use_id_in_checksum
+                    )
+
+                    if msg_id:
+                        # If we've seen this message before, record it as one to be
+                        # deleted in this mailbox.
+                        if msg_id in msg_ids:
+                            print(
+                                "Message %s_%s is a duplicate of %s and %s be marked as deleted"
+                                % (
+                                    mbox, mnum, msg_ids[msg_id],
+                                    options.dry_run and "would" or "will",
+                                ) 
+                            )
+                            if options.verbose:
+                                print(
+                                    "Subject: %s\nFrom: %s\nDate: %s\n"
+                                    % (mp["Subject"], mp["From"], mp["Date"])
+                                )
+                            msgs_to_delete.append(mnum)
+                        # Otherwise just record the fact that we've seen it
+                        else:
+                            msg_ids[msg_id] = f"{mbox}_{mnum}"
+
+                print(
+                    (
+                        "%s message(s) in %s processed"
+                        % (min(len(msgnums), i + chunkSize), mbox)
+                    )
+                )
+
+            # OK - we've been through this mailbox, and msgs_to_delete holds
+            # a list of the duplicates we've found.
+
+            if len(msgs_to_delete) == 0:
+                print("No duplicates were found in %s" % mbox)
+
+            else:
+                if options.verbose:
+                    print("These are the duplicate messages: ")
+                    for mnum in msgs_to_delete:
+                        print_message_info(msg_map[mnum])
+
+                if options.dry_run:
+                    print(
+                        "If you had NOT selected the 'dry-run' option,\n"
+                        "  %i messages would now be marked as 'deleted'."
+                        % (len(msgs_to_delete))
+                    )
+
+                else:
+                    
+                    print("Marking %i messages as deleted..." % (len(msgs_to_delete)))
+                    # Deleting messages one at a time can be slow if there are many,
+                    # so we batch them up.
+                    chunkSize = 30
+                    if options.verbose:
+                        print("(in batches of %d)" % chunkSize)
+                    for i in range(0, len(msgs_to_delete), chunkSize):
+                        mark_messages_deleted(server, msgs_to_delete[i: i + chunkSize])
+                        if options.verbose:
+                            print("Batch starting at item %d marked." % i)
+                    print("Confirming new numbers...")
+                    numdeleted = len(get_deleted_msgnums(server))
+                    numundel = len(get_undeleted_msgnums(server))
+                    print(
+                        "There are now %s messages marked as deleted and %s others in %s."
+                        % (numdeleted, numundel, mbox)
+                    )
+
+        if not options.no_close:
+            server.close()
+
+    except ImapDedupException as e:
+        print("Error:", e, file=sys.stderr)
+    finally:
+        server.logout()
+
+
+def main(args: List[str]):
+    options, mboxes = get_arguments(args)
+    process(options, mboxes)
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])