models.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656
  1. from base64 import b64encode
  2. from collections import namedtuple
  3. from functools import reduce
  4. import json
  5. import operator
  6. import logging
  7. from uuid import uuid4
  8. import requests
  9. from requests.exceptions import RequestException
  10. from Crypto.PublicKey import RSA
  11. from Crypto.Signature import pkcs1_15
  12. from Crypto.Hash import SHA256
  13. from django.apps import apps
  14. from django.core.paginator import Paginator
  15. from django.db.models import Q
  16. from django.utils.http import http_date
  17. from urllib.parse import urlparse
  18. from django.apps import apps
  19. from django.db import models
  20. from django.utils.translation import gettext_lazy as _
  21. from bookwyrm import activitypub
  22. from bookwyrm.settings import USER_AGENT, PAGE_LENGTH
  23. from bookwyrm.signatures import make_signature, make_digest
  24. from bookwyrm.tasks import app, MEDIUM
  25. from bookwyrm.models.fields import ImageField, ManyToManyField
  26. logger = logging.getLogger(__name__)
  27. PropertyField = namedtuple("PropertyField", ("set_activity_from_field"))
  28. # pylint: disable=invalid-name
  29. def set_activity_from_property_field(activity, obj, field):
  30. """assign a model property value to the activity json"""
  31. activity[field[1]] = getattr(obj, field[0])
  32. class ActivityPubMixin:
  33. """A model mixin which allows serialization for the ActivityPub standard
  34. Largely derived from code from Bookwyrm"""
  35. activity_serializer = lambda: {}
  36. reverse_unfurl = False
  37. def __init__(self, *args, **kwargs):
  38. """collect some info on model fields for later use"""
  39. self.image_fields = []
  40. self.many_to_many_fields = []
  41. self.simple_fields = [] # "simple"
  42. # sort model fields by type
  43. for field in self._meta.get_fields():
  44. if not hasattr(field, "field_to_activity"):
  45. continue
  46. if isinstance(field, ImageField):
  47. self.image_fields.append(field)
  48. elif isinstance(field, ManyToManyField):
  49. self.many_to_many_fields.append(field)
  50. else:
  51. self.simple_fields.append(field)
  52. # a list of allll the serializable fields
  53. self.activity_fields = (
  54. self.image_fields + self.many_to_many_fields + self.simple_fields
  55. )
  56. if hasattr(self, "property_fields"):
  57. self.activity_fields += [
  58. # pylint: disable=cell-var-from-loop
  59. PropertyField(
  60. lambda a, o: set_activity_from_property_field(a, o, f)
  61. )
  62. for f in self.property_fields
  63. ]
  64. # these are separate to avoid infinite recursion issues
  65. self.deserialize_reverse_fields = (
  66. self.deserialize_reverse_fields
  67. if hasattr(self, "deserialize_reverse_fields")
  68. else []
  69. )
  70. self.serialize_reverse_fields = (
  71. self.serialize_reverse_fields
  72. if hasattr(self, "serialize_reverse_fields")
  73. else []
  74. )
  75. super().__init__(*args, **kwargs)
  76. @classmethod
  77. def find_existing_by_remote_id(cls, remote_id):
  78. """look up a remote id in the db"""
  79. return cls.find_existing({"id": remote_id})
  80. @classmethod
  81. def find_existing(cls, remote_id=None, data):
  82. """Looks for existing activities"""
  83. """compare data to fields that can be used for deduplation.
  84. This always includes remote_id, but can also be unique identifiers
  85. like an isbn for an edition"""
  86. filters = []
  87. # grabs all the data from the model to create django queryset filters
  88. for field in cls._meta.get_fields():
  89. if (
  90. not hasattr(field, "deduplication_field")
  91. or not field.deduplication_field
  92. ):
  93. continue
  94. value = data.get(field.get_activitypub_field())
  95. if not value:
  96. continue
  97. filters.append({field.name: value})
  98. if hasattr(cls, "origin_id") and "id" in data:
  99. # kinda janky, but this handles special case for books
  100. filters.append({"origin_id": data["id"]})
  101. if not filters:
  102. # if there are no deduplication fields, it will match the first
  103. # item no matter what. this shouldn't happen but just in case.
  104. return None
  105. objects = cls.objects
  106. if hasattr(objects, "select_subclasses"):
  107. objects = objects.select_subclasses()
  108. # an OR operation on all the match fields, sorry for the dense syntax
  109. match = objects.filter(reduce(operator.or_, (Q(**f) for f in filters)))
  110. # there OUGHT to be only one match
  111. return match.first()
  112. def broadcast(self, activity, sender, software=None, queue=MEDIUM):
  113. """Broadast an activity via an asyncronous task"""
  114. broadcast_task.apply_async(
  115. args=(
  116. sender.id,
  117. json.dumps(activity, cls=activitypub.ActivityEncoder),
  118. self.get_recipients(software=software),
  119. ),
  120. queue=queue,
  121. )
  122. def get_recipients(self, software=None):
  123. """figure out which inbox urls to post to"""
  124. # first we have to figure out who should receive this activity
  125. privacy = self.privacy if hasattr(self, "privacy") else "public"
  126. # is this activity owned by a user (statuses, lists, shelves), or is it
  127. # general to the instance (like books)
  128. user = self.user if hasattr(self, "user") else None
  129. user_model = apps.get_model("bookwyrm.User", require_ready=True)
  130. if not user and isinstance(self, user_model):
  131. # or maybe the thing itself is a user
  132. user = self
  133. # find anyone who's tagged in a status, for example
  134. mentions = self.recipients if hasattr(self, "recipients") else []
  135. # we always send activities to explicitly mentioned users' inboxes
  136. recipients = [u.inbox for u in mentions or [] if not u.local]
  137. # unless it's a dm, all the followers should receive the activity
  138. if privacy != "direct":
  139. # we will send this out to a subset of all remote users
  140. queryset = (
  141. user_model.viewer_aware_objects(user)
  142. .filter(
  143. local=False,
  144. )
  145. .distinct()
  146. )
  147. # filter users first by whether they're using the desired software
  148. # this lets us send book updates only to other bw servers
  149. if software:
  150. queryset = queryset.filter(
  151. bookwyrm_user=(software == "bookwyrm")
  152. )
  153. # if there's a user, we only want to send to the user's followers
  154. if user:
  155. queryset = queryset.filter(following=user)
  156. # ideally, we will send to shared inboxes for efficiency
  157. shared_inboxes = (
  158. queryset.filter(shared_inbox__isnull=False)
  159. .values_list("shared_inbox", flat=True)
  160. .distinct()
  161. )
  162. # but not everyone has a shared inbox
  163. inboxes = queryset.filter(shared_inbox__isnull=True).values_list(
  164. "inbox", flat=True
  165. )
  166. recipients += list(shared_inboxes) + list(inboxes)
  167. return list(set(recipients))
  168. def to_activity_dataclass(self):
  169. """convert from a model to an activity"""
  170. activity = generate_activity(self)
  171. return self.activity_serializer(**activity)
  172. def to_activity(self, **kwargs): # pylint: disable=unused-argument
  173. """convert from a model to a json activity"""
  174. return self.to_activity_dataclass().serialize()
  175. class ObjectMixin(ActivitypubMixin):
  176. """add this mixin for object models that are AP serializable"""
  177. def save(
  178. self, *args, created=None, software=None, priority=MEDIUM, **kwargs
  179. ):
  180. """broadcast created/updated/deleted objects as appropriate"""
  181. broadcast = kwargs.get("broadcast", True)
  182. # this bonus kwarg would cause an error in the base save method
  183. if "broadcast" in kwargs:
  184. del kwargs["broadcast"]
  185. created = created or not bool(self.id)
  186. # first off, we want to save normally no matter what
  187. super().save(*args, **kwargs)
  188. if not broadcast or (
  189. hasattr(self, "status_type") and self.status_type == "Announce"
  190. ):
  191. return
  192. # this will work for objects owned by a user (lists, shelves)
  193. user = self.user if hasattr(self, "user") else None
  194. if created:
  195. # broadcast Create activities for objects owned by a local user
  196. if not user or not user.local:
  197. return
  198. try:
  199. # do we have a "pure" activitypub version of this for mastodon?
  200. if software != "bookwyrm" and hasattr(self, "pure_content"):
  201. pure_activity = self.to_create_activity(user, pure=True)
  202. self.broadcast(
  203. pure_activity, user, software="other", queue=priority
  204. )
  205. # set bookwyrm so that that type is also sent
  206. software = "bookwyrm"
  207. # sends to BW only if we just did a pure version for masto
  208. activity = self.to_create_activity(user)
  209. self.broadcast(
  210. activity, user, software=software, queue=priority
  211. )
  212. except AttributeError:
  213. # janky as heck, this catches the mutliple inheritence chain
  214. # for boosts and ignores this auxilliary broadcast
  215. return
  216. return
  217. # --- updating an existing object
  218. if not user:
  219. # users don't have associated users, they ARE users
  220. user_model = apps.get_model("bookwyrm.User", require_ready=True)
  221. if isinstance(self, user_model):
  222. user = self
  223. # book data tracks last editor
  224. user = user or getattr(self, "last_edited_by", None)
  225. # again, if we don't know the user or they're remote, don't bother
  226. if not user or not user.local:
  227. return
  228. # is this a deletion?
  229. if hasattr(self, "deleted") and self.deleted:
  230. activity = self.to_delete_activity(user)
  231. else:
  232. activity = self.to_update_activity(user)
  233. self.broadcast(activity, user, queue=priority)
  234. def to_create_activity(self, user, **kwargs):
  235. """returns the object wrapped in a Create activity"""
  236. activity_object = self.to_activity_dataclass(**kwargs)
  237. signature = None
  238. create_id = self.remote_id + "/activity"
  239. if hasattr(activity_object, "content") and activity_object.content:
  240. signer = pkcs1_15.new(RSA.import_key(user.key_pair.private_key))
  241. content = activity_object.content
  242. signed_message = signer.sign(SHA256.new(content.encode("utf8")))
  243. signature = activitypub.Signature(
  244. creator=f"{user.remote_id}#main-key",
  245. created=activity_object.published,
  246. signatureValue=b64encode(signed_message).decode("utf8"),
  247. )
  248. return activitypub.Create(
  249. id=create_id,
  250. actor=user.remote_id,
  251. to=activity_object.to,
  252. cc=activity_object.cc,
  253. object=activity_object,
  254. signature=signature,
  255. ).serialize()
  256. def to_delete_activity(self, user):
  257. """notice of deletion"""
  258. return activitypub.Delete(
  259. id=self.remote_id + "/activity",
  260. actor=user.remote_id,
  261. to=[f"{user.remote_id}/followers"],
  262. cc=["https://www.w3.org/ns/activitystreams#Public"],
  263. object=self,
  264. ).serialize()
  265. def to_update_activity(self, user):
  266. """wrapper for Updates to an activity"""
  267. uuid = uuid4()
  268. return activitypub.Update(
  269. id=f"{self.remote_id}#update/{uuid}",
  270. actor=user.remote_id,
  271. to=["https://www.w3.org/ns/activitystreams#Public"],
  272. object=self,
  273. ).serialize()
  274. class OrderedCollectionPageMixin(ObjectMixin):
  275. """just the paginator utilities, so you don't HAVE to
  276. override ActivitypubMixin's to_activity (ie, for outbox)"""
  277. @property
  278. def collection_remote_id(self):
  279. """this can be overriden if there's a special remote id, ie outbox"""
  280. return self.remote_id
  281. def to_ordered_collection(
  282. self,
  283. queryset,
  284. remote_id=None,
  285. page=False,
  286. collection_only=False,
  287. **kwargs,
  288. ):
  289. """an ordered collection of whatevers"""
  290. if not queryset.ordered:
  291. raise RuntimeError("queryset must be ordered")
  292. remote_id = remote_id or self.remote_id
  293. if page:
  294. if isinstance(page, list) and len(page) > 0:
  295. page = page[0]
  296. return to_ordered_collection_page(
  297. queryset, remote_id, page=page, **kwargs
  298. )
  299. if collection_only or not hasattr(self, "activity_serializer"):
  300. serializer = activitypub.OrderedCollection
  301. activity = {}
  302. else:
  303. serializer = self.activity_serializer
  304. # a dict from the model fields
  305. activity = generate_activity(self)
  306. if remote_id:
  307. activity["id"] = remote_id
  308. paginated = Paginator(queryset, PAGE_LENGTH)
  309. # add computed fields specific to orderd collections
  310. activity["totalItems"] = paginated.count
  311. activity["first"] = f"{remote_id}?page=1"
  312. activity["last"] = f"{remote_id}?page={paginated.num_pages}"
  313. return serializer(**activity)
  314. class OrderedCollectionMixin(OrderedCollectionPageMixin):
  315. """extends activitypub models to work as ordered collections"""
  316. @property
  317. def collection_queryset(self):
  318. """usually an ordered collection model aggregates a different model"""
  319. raise NotImplementedError("Model must define collection_queryset")
  320. activity_serializer = activitypub.OrderedCollection
  321. def to_activity_dataclass(self, **kwargs):
  322. return self.to_ordered_collection(self.collection_queryset, **kwargs)
  323. def to_activity(self, **kwargs):
  324. """an ordered collection of the specified model queryset"""
  325. return self.to_ordered_collection(
  326. self.collection_queryset, **kwargs
  327. ).serialize()
  328. def delete(self, *args, broadcast=True, **kwargs):
  329. """Delete the object"""
  330. activity = self.to_delete_activity(self.user)
  331. super().delete(*args, **kwargs)
  332. if self.user.local and broadcast:
  333. self.broadcast(activity, self.user)
  334. class CollectionItemMixin(ActivitypubMixin):
  335. """for items that are part of an (Ordered)Collection"""
  336. activity_serializer = activitypub.CollectionItem
  337. def broadcast(self, activity, sender, software="bookwyrm", queue=MEDIUM):
  338. """only send book collection updates to other bookwyrm instances"""
  339. super().broadcast(activity, sender, software=software, queue=queue)
  340. @property
  341. def privacy(self):
  342. """inherit the privacy of the list, or direct if pending"""
  343. collection_field = getattr(self, self.collection_field)
  344. if self.approved:
  345. return collection_field.privacy
  346. return "direct"
  347. @property
  348. def recipients(self):
  349. """the owner of the list is a direct recipient"""
  350. collection_field = getattr(self, self.collection_field)
  351. if collection_field.user.local:
  352. # don't broadcast to yourself
  353. return []
  354. return [collection_field.user]
  355. def save(self, *args, broadcast=True, priority=MEDIUM, **kwargs):
  356. """broadcast updated"""
  357. # first off, we want to save normally no matter what
  358. super().save(*args, **kwargs)
  359. # list items can be updateda, normally you would only broadcast on created
  360. if not broadcast or not self.user.local:
  361. return
  362. # adding an obj to the collection
  363. activity = self.to_add_activity(self.user)
  364. self.broadcast(activity, self.user, queue=priority)
  365. def delete(self, *args, broadcast=True, **kwargs):
  366. """broadcast a remove activity"""
  367. activity = self.to_remove_activity(self.user)
  368. super().delete(*args, **kwargs)
  369. if self.user.local and broadcast:
  370. self.broadcast(activity, self.user)
  371. def to_add_activity(self, user):
  372. """AP for shelving a book"""
  373. collection_field = getattr(self, self.collection_field)
  374. return activitypub.Add(
  375. id=f"{collection_field.remote_id}#add",
  376. actor=user.remote_id,
  377. object=self.to_activity_dataclass(),
  378. target=collection_field.remote_id,
  379. ).serialize()
  380. def to_remove_activity(self, user):
  381. """AP for un-shelving a book"""
  382. collection_field = getattr(self, self.collection_field)
  383. return activitypub.Remove(
  384. id=f"{collection_field.remote_id}#remove",
  385. actor=user.remote_id,
  386. object=self.to_activity_dataclass(),
  387. target=collection_field.remote_id,
  388. ).serialize()
  389. class ActivityMixin(ActivitypubMixin):
  390. """add this mixin for models that are AP serializable"""
  391. def save(self, *args, broadcast=True, priority=MEDIUM, **kwargs):
  392. """broadcast activity"""
  393. super().save(*args, **kwargs)
  394. user = self.user if hasattr(self, "user") else self.user_subject
  395. if broadcast and user.local:
  396. self.broadcast(self.to_activity(), user, queue=priority)
  397. def delete(self, *args, broadcast=True, **kwargs):
  398. """nevermind, undo that activity"""
  399. user = self.user if hasattr(self, "user") else self.user_subject
  400. if broadcast and user.local:
  401. self.broadcast(self.to_undo_activity(), user)
  402. super().delete(*args, **kwargs)
  403. def to_undo_activity(self):
  404. """undo an action"""
  405. user = self.user if hasattr(self, "user") else self.user_subject
  406. return activitypub.Undo(
  407. id=f"{self.remote_id}#undo",
  408. actor=user.remote_id,
  409. object=self,
  410. ).serialize()
  411. def generate_activity(obj):
  412. """go through the fields on an object"""
  413. activity = {}
  414. for field in obj.activity_fields:
  415. field.set_activity_from_field(activity, obj)
  416. if hasattr(obj, "serialize_reverse_fields"):
  417. # for example, editions of a work
  418. for (
  419. model_field_name,
  420. activity_field_name,
  421. sort_field,
  422. ) in obj.serialize_reverse_fields:
  423. related_field = getattr(obj, model_field_name)
  424. activity[activity_field_name] = unfurl_related_field(
  425. related_field, sort_field=sort_field
  426. )
  427. if not activity.get("id"):
  428. activity["id"] = obj.get_remote_id()
  429. return activity
  430. def unfurl_related_field(related_field, sort_field=None):
  431. """load reverse lookups (like public key owner or Status attachment"""
  432. if sort_field and hasattr(related_field, "all"):
  433. return [
  434. unfurl_related_field(i)
  435. for i in related_field.order_by(sort_field).all()
  436. ]
  437. if related_field.reverse_unfurl:
  438. # if it's a one-to-one (key pair)
  439. if hasattr(related_field, "field_to_activity"):
  440. return related_field.field_to_activity()
  441. # if it's one-to-many (attachments)
  442. return related_field.to_activity()
  443. return related_field.remote_id
  444. @app.task(queue=MEDIUM)
  445. def broadcast_task(sender_id, activity, recipients):
  446. """the celery task for broadcast"""
  447. user_model = apps.get_model("bookwyrm.User", require_ready=True)
  448. sender = user_model.objects.get(id=sender_id)
  449. for recipient in recipients:
  450. try:
  451. sign_and_send(sender, activity, recipient)
  452. except RequestException:
  453. pass
  454. def sign_and_send(sender, data, destination):
  455. """crpyto whatever and http junk"""
  456. now = http_date()
  457. if not sender.key_pair.private_key:
  458. # this shouldn't happen. it would be bad if it happened.
  459. raise ValueError("No private key found for sender")
  460. digest = make_digest(data)
  461. response = requests.post(
  462. destination,
  463. data=data,
  464. headers={
  465. "Date": now,
  466. "Digest": digest,
  467. "Signature": make_signature(sender, destination, now, digest),
  468. "Content-Type": "application/activity+json; charset=utf-8",
  469. "User-Agent": USER_AGENT,
  470. },
  471. )
  472. if not response.ok:
  473. response.raise_for_status()
  474. return response
  475. # pylint: disable=unused-argument
  476. def to_ordered_collection_page(
  477. queryset, remote_id, id_only=False, page=1, pure=False, **kwargs
  478. ):
  479. """serialize and pagiante a queryset"""
  480. paginated = Paginator(queryset, PAGE_LENGTH)
  481. activity_page = paginated.get_page(page)
  482. if id_only:
  483. items = [s.remote_id for s in activity_page.object_list]
  484. else:
  485. items = [s.to_activity(pure=pure) for s in activity_page.object_list]
  486. prev_page = next_page = None
  487. if activity_page.has_next():
  488. next_page = f"{remote_id}?page={activity_page.next_page_number()}"
  489. if activity_page.has_previous():
  490. prev_page = (
  491. f"{remote_id}?page=%d{activity_page.previous_page_number()}"
  492. )
  493. return activitypub.OrderedCollectionPage(
  494. id=f"{remote_id}?page={page}",
  495. partOf=remote_id,
  496. orderedItems=items,
  497. next=next_page,
  498. prev=prev_page,
  499. )
  500. FederationStatus = [
  501. ("federated", _("Federated")),
  502. ("blocked", _("Blocked")),
  503. ]
  504. class FederatedServer(BookWyrmModel):
  505. """store which servers we federate with"""
  506. server_name = models.CharField(max_length=255, unique=True)
  507. status = models.CharField(
  508. max_length=255, default="federated", choices=FederationStatus
  509. )
  510. # is it mastodon, bookwyrm, etc
  511. application_type = models.CharField(max_length=255, null=True, blank=True)
  512. application_version = models.CharField(max_length=255, null=True, blank=True)
  513. notes = models.TextField(null=True, blank=True)
  514. def block(self):
  515. """block a server"""
  516. self.status = "blocked"
  517. self.save(update_fields=["status"])
  518. # deactivate all associated users
  519. self.user_set.filter(is_active=True).update(
  520. is_active=False, deactivation_reason="domain_block"
  521. )
  522. # check for related connectors
  523. if self.application_type == "bookwyrm":
  524. connector_model = apps.get_model("bookwyrm.Connector", require_ready=True)
  525. connector_model.objects.filter(
  526. identifier=self.server_name, active=True
  527. ).update(active=False, deactivation_reason="domain_block")
  528. def unblock(self):
  529. """unblock a server"""
  530. self.status = "federated"
  531. self.save(update_fields=["status"])
  532. self.user_set.filter(deactivation_reason="domain_block").update(
  533. is_active=True, deactivation_reason=None
  534. )
  535. # check for related connectors
  536. if self.application_type == "bookwyrm":
  537. connector_model = apps.get_model("bookwyrm.Connector", require_ready=True)
  538. connector_model.objects.filter(
  539. identifier=self.server_name,
  540. active=False,
  541. deactivation_reason="domain_block",
  542. ).update(active=True, deactivation_reason=None)
  543. @classmethod
  544. def is_blocked(cls, url):
  545. """look up if a domain is blocked"""
  546. url = urlparse(url)
  547. domain = url.netloc
  548. return cls.objects.filter(server_name=domain, status="blocked").exists()