123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656 |
- from base64 import b64encode
- from collections import namedtuple
- from functools import reduce
- import json
- import operator
- import logging
- from uuid import uuid4
- import requests
- from requests.exceptions import RequestException
- from Crypto.PublicKey import RSA
- from Crypto.Signature import pkcs1_15
- from Crypto.Hash import SHA256
- from django.apps import apps
- from django.core.paginator import Paginator
- from django.db.models import Q
- from django.utils.http import http_date
- from urllib.parse import urlparse
- from django.apps import apps
- from django.db import models
- from django.utils.translation import gettext_lazy as _
- from bookwyrm import activitypub
- from bookwyrm.settings import USER_AGENT, PAGE_LENGTH
- from bookwyrm.signatures import make_signature, make_digest
- from bookwyrm.tasks import app, MEDIUM
- from bookwyrm.models.fields import ImageField, ManyToManyField
- logger = logging.getLogger(__name__)
- PropertyField = namedtuple("PropertyField", ("set_activity_from_field"))
- # pylint: disable=invalid-name
- def set_activity_from_property_field(activity, obj, field):
- """assign a model property value to the activity json"""
- activity[field[1]] = getattr(obj, field[0])
- class ActivityPubMixin:
- """A model mixin which allows serialization for the ActivityPub standard
- Largely derived from code from Bookwyrm"""
- activity_serializer = lambda: {}
- reverse_unfurl = False
- def __init__(self, *args, **kwargs):
- """collect some info on model fields for later use"""
- self.image_fields = []
- self.many_to_many_fields = []
- self.simple_fields = [] # "simple"
- # sort model fields by type
- for field in self._meta.get_fields():
- if not hasattr(field, "field_to_activity"):
- continue
- if isinstance(field, ImageField):
- self.image_fields.append(field)
- elif isinstance(field, ManyToManyField):
- self.many_to_many_fields.append(field)
- else:
- self.simple_fields.append(field)
- # a list of allll the serializable fields
- self.activity_fields = (
- self.image_fields + self.many_to_many_fields + self.simple_fields
- )
- if hasattr(self, "property_fields"):
- self.activity_fields += [
- # pylint: disable=cell-var-from-loop
- PropertyField(
- lambda a, o: set_activity_from_property_field(a, o, f)
- )
- for f in self.property_fields
- ]
- # these are separate to avoid infinite recursion issues
- self.deserialize_reverse_fields = (
- self.deserialize_reverse_fields
- if hasattr(self, "deserialize_reverse_fields")
- else []
- )
- self.serialize_reverse_fields = (
- self.serialize_reverse_fields
- if hasattr(self, "serialize_reverse_fields")
- else []
- )
- super().__init__(*args, **kwargs)
- @classmethod
- def find_existing_by_remote_id(cls, remote_id):
- """look up a remote id in the db"""
- return cls.find_existing({"id": remote_id})
- @classmethod
- def find_existing(cls, remote_id=None, data):
- """Looks for existing activities"""
- """compare data to fields that can be used for deduplation.
- This always includes remote_id, but can also be unique identifiers
- like an isbn for an edition"""
- filters = []
- # grabs all the data from the model to create django queryset filters
- for field in cls._meta.get_fields():
- if (
- not hasattr(field, "deduplication_field")
- or not field.deduplication_field
- ):
- continue
- value = data.get(field.get_activitypub_field())
- if not value:
- continue
- filters.append({field.name: value})
- if hasattr(cls, "origin_id") and "id" in data:
- # kinda janky, but this handles special case for books
- filters.append({"origin_id": data["id"]})
- if not filters:
- # if there are no deduplication fields, it will match the first
- # item no matter what. this shouldn't happen but just in case.
- return None
- objects = cls.objects
- if hasattr(objects, "select_subclasses"):
- objects = objects.select_subclasses()
- # an OR operation on all the match fields, sorry for the dense syntax
- match = objects.filter(reduce(operator.or_, (Q(**f) for f in filters)))
- # there OUGHT to be only one match
- return match.first()
- def broadcast(self, activity, sender, software=None, queue=MEDIUM):
- """Broadast an activity via an asyncronous task"""
- broadcast_task.apply_async(
- args=(
- sender.id,
- json.dumps(activity, cls=activitypub.ActivityEncoder),
- self.get_recipients(software=software),
- ),
- queue=queue,
- )
- def get_recipients(self, software=None):
- """figure out which inbox urls to post to"""
- # first we have to figure out who should receive this activity
- privacy = self.privacy if hasattr(self, "privacy") else "public"
- # is this activity owned by a user (statuses, lists, shelves), or is it
- # general to the instance (like books)
- user = self.user if hasattr(self, "user") else None
- user_model = apps.get_model("bookwyrm.User", require_ready=True)
- if not user and isinstance(self, user_model):
- # or maybe the thing itself is a user
- user = self
- # find anyone who's tagged in a status, for example
- mentions = self.recipients if hasattr(self, "recipients") else []
- # we always send activities to explicitly mentioned users' inboxes
- recipients = [u.inbox for u in mentions or [] if not u.local]
- # unless it's a dm, all the followers should receive the activity
- if privacy != "direct":
- # we will send this out to a subset of all remote users
- queryset = (
- user_model.viewer_aware_objects(user)
- .filter(
- local=False,
- )
- .distinct()
- )
- # filter users first by whether they're using the desired software
- # this lets us send book updates only to other bw servers
- if software:
- queryset = queryset.filter(
- bookwyrm_user=(software == "bookwyrm")
- )
- # if there's a user, we only want to send to the user's followers
- if user:
- queryset = queryset.filter(following=user)
- # ideally, we will send to shared inboxes for efficiency
- shared_inboxes = (
- queryset.filter(shared_inbox__isnull=False)
- .values_list("shared_inbox", flat=True)
- .distinct()
- )
- # but not everyone has a shared inbox
- inboxes = queryset.filter(shared_inbox__isnull=True).values_list(
- "inbox", flat=True
- )
- recipients += list(shared_inboxes) + list(inboxes)
- return list(set(recipients))
- def to_activity_dataclass(self):
- """convert from a model to an activity"""
- activity = generate_activity(self)
- return self.activity_serializer(**activity)
- def to_activity(self, **kwargs): # pylint: disable=unused-argument
- """convert from a model to a json activity"""
- return self.to_activity_dataclass().serialize()
- class ObjectMixin(ActivitypubMixin):
- """add this mixin for object models that are AP serializable"""
- def save(
- self, *args, created=None, software=None, priority=MEDIUM, **kwargs
- ):
- """broadcast created/updated/deleted objects as appropriate"""
- broadcast = kwargs.get("broadcast", True)
- # this bonus kwarg would cause an error in the base save method
- if "broadcast" in kwargs:
- del kwargs["broadcast"]
- created = created or not bool(self.id)
- # first off, we want to save normally no matter what
- super().save(*args, **kwargs)
- if not broadcast or (
- hasattr(self, "status_type") and self.status_type == "Announce"
- ):
- return
- # this will work for objects owned by a user (lists, shelves)
- user = self.user if hasattr(self, "user") else None
- if created:
- # broadcast Create activities for objects owned by a local user
- if not user or not user.local:
- return
- try:
- # do we have a "pure" activitypub version of this for mastodon?
- if software != "bookwyrm" and hasattr(self, "pure_content"):
- pure_activity = self.to_create_activity(user, pure=True)
- self.broadcast(
- pure_activity, user, software="other", queue=priority
- )
- # set bookwyrm so that that type is also sent
- software = "bookwyrm"
- # sends to BW only if we just did a pure version for masto
- activity = self.to_create_activity(user)
- self.broadcast(
- activity, user, software=software, queue=priority
- )
- except AttributeError:
- # janky as heck, this catches the mutliple inheritence chain
- # for boosts and ignores this auxilliary broadcast
- return
- return
- # --- updating an existing object
- if not user:
- # users don't have associated users, they ARE users
- user_model = apps.get_model("bookwyrm.User", require_ready=True)
- if isinstance(self, user_model):
- user = self
- # book data tracks last editor
- user = user or getattr(self, "last_edited_by", None)
- # again, if we don't know the user or they're remote, don't bother
- if not user or not user.local:
- return
- # is this a deletion?
- if hasattr(self, "deleted") and self.deleted:
- activity = self.to_delete_activity(user)
- else:
- activity = self.to_update_activity(user)
- self.broadcast(activity, user, queue=priority)
- def to_create_activity(self, user, **kwargs):
- """returns the object wrapped in a Create activity"""
- activity_object = self.to_activity_dataclass(**kwargs)
- signature = None
- create_id = self.remote_id + "/activity"
- if hasattr(activity_object, "content") and activity_object.content:
- signer = pkcs1_15.new(RSA.import_key(user.key_pair.private_key))
- content = activity_object.content
- signed_message = signer.sign(SHA256.new(content.encode("utf8")))
- signature = activitypub.Signature(
- creator=f"{user.remote_id}#main-key",
- created=activity_object.published,
- signatureValue=b64encode(signed_message).decode("utf8"),
- )
- return activitypub.Create(
- id=create_id,
- actor=user.remote_id,
- to=activity_object.to,
- cc=activity_object.cc,
- object=activity_object,
- signature=signature,
- ).serialize()
- def to_delete_activity(self, user):
- """notice of deletion"""
- return activitypub.Delete(
- id=self.remote_id + "/activity",
- actor=user.remote_id,
- to=[f"{user.remote_id}/followers"],
- cc=["https://www.w3.org/ns/activitystreams#Public"],
- object=self,
- ).serialize()
- def to_update_activity(self, user):
- """wrapper for Updates to an activity"""
- uuid = uuid4()
- return activitypub.Update(
- id=f"{self.remote_id}#update/{uuid}",
- actor=user.remote_id,
- to=["https://www.w3.org/ns/activitystreams#Public"],
- object=self,
- ).serialize()
- class OrderedCollectionPageMixin(ObjectMixin):
- """just the paginator utilities, so you don't HAVE to
- override ActivitypubMixin's to_activity (ie, for outbox)"""
- @property
- def collection_remote_id(self):
- """this can be overriden if there's a special remote id, ie outbox"""
- return self.remote_id
- def to_ordered_collection(
- self,
- queryset,
- remote_id=None,
- page=False,
- collection_only=False,
- **kwargs,
- ):
- """an ordered collection of whatevers"""
- if not queryset.ordered:
- raise RuntimeError("queryset must be ordered")
- remote_id = remote_id or self.remote_id
- if page:
- if isinstance(page, list) and len(page) > 0:
- page = page[0]
- return to_ordered_collection_page(
- queryset, remote_id, page=page, **kwargs
- )
- if collection_only or not hasattr(self, "activity_serializer"):
- serializer = activitypub.OrderedCollection
- activity = {}
- else:
- serializer = self.activity_serializer
- # a dict from the model fields
- activity = generate_activity(self)
- if remote_id:
- activity["id"] = remote_id
- paginated = Paginator(queryset, PAGE_LENGTH)
- # add computed fields specific to orderd collections
- activity["totalItems"] = paginated.count
- activity["first"] = f"{remote_id}?page=1"
- activity["last"] = f"{remote_id}?page={paginated.num_pages}"
- return serializer(**activity)
- class OrderedCollectionMixin(OrderedCollectionPageMixin):
- """extends activitypub models to work as ordered collections"""
- @property
- def collection_queryset(self):
- """usually an ordered collection model aggregates a different model"""
- raise NotImplementedError("Model must define collection_queryset")
- activity_serializer = activitypub.OrderedCollection
- def to_activity_dataclass(self, **kwargs):
- return self.to_ordered_collection(self.collection_queryset, **kwargs)
- def to_activity(self, **kwargs):
- """an ordered collection of the specified model queryset"""
- return self.to_ordered_collection(
- self.collection_queryset, **kwargs
- ).serialize()
- def delete(self, *args, broadcast=True, **kwargs):
- """Delete the object"""
- activity = self.to_delete_activity(self.user)
- super().delete(*args, **kwargs)
- if self.user.local and broadcast:
- self.broadcast(activity, self.user)
- class CollectionItemMixin(ActivitypubMixin):
- """for items that are part of an (Ordered)Collection"""
- activity_serializer = activitypub.CollectionItem
- def broadcast(self, activity, sender, software="bookwyrm", queue=MEDIUM):
- """only send book collection updates to other bookwyrm instances"""
- super().broadcast(activity, sender, software=software, queue=queue)
- @property
- def privacy(self):
- """inherit the privacy of the list, or direct if pending"""
- collection_field = getattr(self, self.collection_field)
- if self.approved:
- return collection_field.privacy
- return "direct"
- @property
- def recipients(self):
- """the owner of the list is a direct recipient"""
- collection_field = getattr(self, self.collection_field)
- if collection_field.user.local:
- # don't broadcast to yourself
- return []
- return [collection_field.user]
- def save(self, *args, broadcast=True, priority=MEDIUM, **kwargs):
- """broadcast updated"""
- # first off, we want to save normally no matter what
- super().save(*args, **kwargs)
- # list items can be updateda, normally you would only broadcast on created
- if not broadcast or not self.user.local:
- return
- # adding an obj to the collection
- activity = self.to_add_activity(self.user)
- self.broadcast(activity, self.user, queue=priority)
- def delete(self, *args, broadcast=True, **kwargs):
- """broadcast a remove activity"""
- activity = self.to_remove_activity(self.user)
- super().delete(*args, **kwargs)
- if self.user.local and broadcast:
- self.broadcast(activity, self.user)
- def to_add_activity(self, user):
- """AP for shelving a book"""
- collection_field = getattr(self, self.collection_field)
- return activitypub.Add(
- id=f"{collection_field.remote_id}#add",
- actor=user.remote_id,
- object=self.to_activity_dataclass(),
- target=collection_field.remote_id,
- ).serialize()
- def to_remove_activity(self, user):
- """AP for un-shelving a book"""
- collection_field = getattr(self, self.collection_field)
- return activitypub.Remove(
- id=f"{collection_field.remote_id}#remove",
- actor=user.remote_id,
- object=self.to_activity_dataclass(),
- target=collection_field.remote_id,
- ).serialize()
- class ActivityMixin(ActivitypubMixin):
- """add this mixin for models that are AP serializable"""
- def save(self, *args, broadcast=True, priority=MEDIUM, **kwargs):
- """broadcast activity"""
- super().save(*args, **kwargs)
- user = self.user if hasattr(self, "user") else self.user_subject
- if broadcast and user.local:
- self.broadcast(self.to_activity(), user, queue=priority)
- def delete(self, *args, broadcast=True, **kwargs):
- """nevermind, undo that activity"""
- user = self.user if hasattr(self, "user") else self.user_subject
- if broadcast and user.local:
- self.broadcast(self.to_undo_activity(), user)
- super().delete(*args, **kwargs)
- def to_undo_activity(self):
- """undo an action"""
- user = self.user if hasattr(self, "user") else self.user_subject
- return activitypub.Undo(
- id=f"{self.remote_id}#undo",
- actor=user.remote_id,
- object=self,
- ).serialize()
- def generate_activity(obj):
- """go through the fields on an object"""
- activity = {}
- for field in obj.activity_fields:
- field.set_activity_from_field(activity, obj)
- if hasattr(obj, "serialize_reverse_fields"):
- # for example, editions of a work
- for (
- model_field_name,
- activity_field_name,
- sort_field,
- ) in obj.serialize_reverse_fields:
- related_field = getattr(obj, model_field_name)
- activity[activity_field_name] = unfurl_related_field(
- related_field, sort_field=sort_field
- )
- if not activity.get("id"):
- activity["id"] = obj.get_remote_id()
- return activity
- def unfurl_related_field(related_field, sort_field=None):
- """load reverse lookups (like public key owner or Status attachment"""
- if sort_field and hasattr(related_field, "all"):
- return [
- unfurl_related_field(i)
- for i in related_field.order_by(sort_field).all()
- ]
- if related_field.reverse_unfurl:
- # if it's a one-to-one (key pair)
- if hasattr(related_field, "field_to_activity"):
- return related_field.field_to_activity()
- # if it's one-to-many (attachments)
- return related_field.to_activity()
- return related_field.remote_id
- @app.task(queue=MEDIUM)
- def broadcast_task(sender_id, activity, recipients):
- """the celery task for broadcast"""
- user_model = apps.get_model("bookwyrm.User", require_ready=True)
- sender = user_model.objects.get(id=sender_id)
- for recipient in recipients:
- try:
- sign_and_send(sender, activity, recipient)
- except RequestException:
- pass
- def sign_and_send(sender, data, destination):
- """crpyto whatever and http junk"""
- now = http_date()
- if not sender.key_pair.private_key:
- # this shouldn't happen. it would be bad if it happened.
- raise ValueError("No private key found for sender")
- digest = make_digest(data)
- response = requests.post(
- destination,
- data=data,
- headers={
- "Date": now,
- "Digest": digest,
- "Signature": make_signature(sender, destination, now, digest),
- "Content-Type": "application/activity+json; charset=utf-8",
- "User-Agent": USER_AGENT,
- },
- )
- if not response.ok:
- response.raise_for_status()
- return response
- # pylint: disable=unused-argument
- def to_ordered_collection_page(
- queryset, remote_id, id_only=False, page=1, pure=False, **kwargs
- ):
- """serialize and pagiante a queryset"""
- paginated = Paginator(queryset, PAGE_LENGTH)
- activity_page = paginated.get_page(page)
- if id_only:
- items = [s.remote_id for s in activity_page.object_list]
- else:
- items = [s.to_activity(pure=pure) for s in activity_page.object_list]
- prev_page = next_page = None
- if activity_page.has_next():
- next_page = f"{remote_id}?page={activity_page.next_page_number()}"
- if activity_page.has_previous():
- prev_page = (
- f"{remote_id}?page=%d{activity_page.previous_page_number()}"
- )
- return activitypub.OrderedCollectionPage(
- id=f"{remote_id}?page={page}",
- partOf=remote_id,
- orderedItems=items,
- next=next_page,
- prev=prev_page,
- )
- FederationStatus = [
- ("federated", _("Federated")),
- ("blocked", _("Blocked")),
- ]
- class FederatedServer(BookWyrmModel):
- """store which servers we federate with"""
- server_name = models.CharField(max_length=255, unique=True)
- status = models.CharField(
- max_length=255, default="federated", choices=FederationStatus
- )
- # is it mastodon, bookwyrm, etc
- application_type = models.CharField(max_length=255, null=True, blank=True)
- application_version = models.CharField(max_length=255, null=True, blank=True)
- notes = models.TextField(null=True, blank=True)
- def block(self):
- """block a server"""
- self.status = "blocked"
- self.save(update_fields=["status"])
- # deactivate all associated users
- self.user_set.filter(is_active=True).update(
- is_active=False, deactivation_reason="domain_block"
- )
- # check for related connectors
- if self.application_type == "bookwyrm":
- connector_model = apps.get_model("bookwyrm.Connector", require_ready=True)
- connector_model.objects.filter(
- identifier=self.server_name, active=True
- ).update(active=False, deactivation_reason="domain_block")
- def unblock(self):
- """unblock a server"""
- self.status = "federated"
- self.save(update_fields=["status"])
- self.user_set.filter(deactivation_reason="domain_block").update(
- is_active=True, deactivation_reason=None
- )
- # check for related connectors
- if self.application_type == "bookwyrm":
- connector_model = apps.get_model("bookwyrm.Connector", require_ready=True)
- connector_model.objects.filter(
- identifier=self.server_name,
- active=False,
- deactivation_reason="domain_block",
- ).update(active=True, deactivation_reason=None)
- @classmethod
- def is_blocked(cls, url):
- """look up if a domain is blocked"""
- url = urlparse(url)
- domain = url.netloc
- return cls.objects.filter(server_name=domain, status="blocked").exists()
|