summaryrefslogtreecommitdiff
path: root/plugins/tipue-search/tipue_search.py
diff options
context:
space:
mode:
authorsrv <enmanuel.saravia.externo@pandero.com.pe>2025-04-28 17:11:28 -0500
committersrv <enmanuel.saravia.externo@pandero.com.pe>2025-04-28 17:11:28 -0500
commitf35a7b0e70032de2feec9f3bda09da44cf0e1073 (patch)
tree1e0e09581dd3707d0ceb93346452dd14451a8423 /plugins/tipue-search/tipue_search.py
first commit
Diffstat (limited to 'plugins/tipue-search/tipue_search.py')
-rw-r--r--plugins/tipue-search/tipue_search.py212
1 files changed, 212 insertions, 0 deletions
diff --git a/plugins/tipue-search/tipue_search.py b/plugins/tipue-search/tipue_search.py
new file mode 100644
index 0000000..19ef68f
--- /dev/null
+++ b/plugins/tipue-search/tipue_search.py
@@ -0,0 +1,212 @@
+# -*- coding: utf-8 -*-
+"""
+Tipue Search
+============
+
+A Pelican plugin to serialize generated HTML to JSON
+that can be used by jQuery plugin - Tipue Search.
+
+Copyright (c) Talha Mansoor
+"""
+
+from __future__ import unicode_literals
+
+import os.path
+import json
+import re
+from bs4 import BeautifulSoup
+from codecs import open
+try:
+ from urlparse import urljoin
+except ImportError:
+ from urllib.parse import urljoin
+
+from pelican import signals
+
+
+class Tipue_Search_JSON_Generator(object):
+
+ def __init__(self, context, settings, path, theme, output_path, *null):
+
+ self.output_path = output_path
+ self.context = context
+ self.siteurl = settings.get('SITEURL')
+ self.relative_urls = settings.get('RELATIVE_URLS')
+ self.tpages = settings.get('TEMPLATE_PAGES')
+ self.tstatic = settings.get('THEME_STATIC_DIR')
+ self.output_path = output_path
+ self.json_nodes = []
+
+ def normalize(self, s):
+ replacements = (
+ ("á", "a"),
+ ("é", "e"),
+ ("í", "i"),
+ ("ó", "o"),
+ ("ú", "u"),
+ (".", ""),
+ )
+ s = s.lower()
+ for a, b in replacements:
+ s = s.replace(a, b).replace(a.lower(), b.lower())
+
+ s = re.sub(r"([a-z]) ([a-z])", r"\1-\2", s, 0,
+ re.IGNORECASE | re.DOTALL)
+ return s
+
+ def create_json_node(self, article):
+
+ if getattr(article, 'status', 'published') != 'published':
+ return
+
+ soup_title = BeautifulSoup(
+ article.title.replace('&nbsp;', ' '), 'html.parser')
+ video_title = soup_title.get_text(' ', strip=True).replace(
+ '“', '"').replace(
+ '”', '"').replace(
+ '’', "'").replace('^', '&#94;')
+
+ # description
+ art_desc = BeautifulSoup(article.content, 'html.parser')
+
+ # fix ignore <h1> inside <figure> description
+ try:
+ art_desc = art_desc.find('figure').find_all_next('p')
+ art_desc_html = ''.join(map(str, art_desc))
+ art_desc = BeautifulSoup(art_desc_html, 'html.parser')
+ video_desc_html = art_desc_html.replace('\n', '&#32;')
+ except:
+ video_desc_html = ''.join(
+ map(str, art_desc)).replace('\n', '&#32;')
+ pass
+
+ video_desc_text = art_desc.get_text(' ', strip=True).replace(
+ '“', '"').replace(
+ '”', '"').replace(
+ '’', "'").replace(
+ '¶', ' ').replace('^', '&#94;')
+
+ video_desc_text = ' '.join(video_desc_text.split())
+
+ # base url
+ if self.relative_urls:
+ base_url = '.'
+ else:
+ base_url = self.siteurl
+
+ # videoid
+ video_id = str(article.videoid) if getattr(
+ article, 'videoid', 'None') != 'None' else ''
+
+ # thumbnail
+ video_image = article.image if getattr(
+ article, 'image', 'None') != 'None' else ''
+
+ url_image = "%s/%s/../wp-content/uploads/article/thumbnail/%s" % (
+ base_url, self.tstatic, video_image
+ )
+
+ # publish
+ video_publish = article.date.isoformat() if getattr(
+ article, 'date', 'None') != 'None' else ''
+
+ # publish_text
+ video_publish_text = article.date.strftime("%a, %d %B, %Y") if getattr(
+ article, 'date', 'None') != 'None' else ''
+
+ # author
+ video_author = str(article.author) if getattr(
+ article, 'author', 'None') != 'None' else ''
+
+ # author url
+ video_author_url = "%s/author/%s/" % (
+ base_url, self.normalize(video_author)
+ )
+
+ # time
+ video_time = article.time if getattr(
+ article, 'time', 'None') != 'None' else ''
+
+ video_url = '.'
+ if article.url:
+ video_url = article.url if self.relative_urls else (
+ self.siteurl + '/' + article.url)
+
+ video_src = article.og_video if getattr(
+ article, 'og_video', 'None') != 'None' else ''
+
+ # category
+ video_category = article.category.name if getattr(
+ article, 'category', 'None') != 'None' else ''
+
+ # tags
+ data_tags = ['%s' % (tag) for tag in article.tags]
+ video_tags = dict((num, tag) for num, tag in enumerate(data_tags))
+
+ node = {
+ 'videoId': video_id,
+ 'title': video_title,
+ 'description': video_desc_text,
+ 'descriptionHtml': video_desc_html,
+ 'videoThumbnail': url_image,
+ 'formatStreams': {
+ 'url': video_src,
+ },
+ 'author': video_author,
+ 'authorUrl': video_author_url,
+ 'published': video_publish,
+ 'publishedText': video_publish_text,
+ 'time': video_time,
+ 'category': video_category,
+ 'keywords': video_tags,
+ 'url': video_url
+ }
+
+ self.json_nodes.append(node)
+
+ def create_tpage_node(self, srclink):
+
+ srcfile = open(os.path.join(self.output_path,
+ self.tpages[srclink]),
+ encoding='utf-8')
+ soup = BeautifulSoup(srcfile, 'html.parser')
+ video_title = soup.title.string if soup.title is not None else ''
+ video_text = soup.get_text()
+
+ # Should set default category
+ video_category = ''
+ video_url = urljoin(self.siteurl, self.tpages[srclink])
+
+ node = {'title': video_title,
+ 'text': video_text,
+ 'tags': video_category,
+ 'url': video_url}
+
+ self.json_nodes.append(node)
+
+ def generate_output(self, writer):
+ path = os.path.join(self.output_path, 'tipuesearch_content.json')
+
+ articles = self.context['articles']
+
+ for article in self.context['articles']:
+ articles += article.translations
+
+ for srclink in self.tpages:
+ self.create_tpage_node(srclink)
+
+ for article in articles:
+ self.create_json_node(article)
+
+ root_node = {'videos': self.json_nodes}
+
+ with open(path, 'w', encoding='utf-8') as fd:
+ json.dump(root_node, fd, separators=(',', ':'), ensure_ascii=False)
+
+
+def get_generators(generators):
+ return Tipue_Search_JSON_Generator
+
+
+def register():
+ signals.get_generators.connect(get_generators)