first commit

author: srv <enmanuel.saravia.externo@pandero.com.pe> 2025-04-28 17:11:28 -0500
committer: srv <enmanuel.saravia.externo@pandero.com.pe> 2025-04-28 17:11:28 -0500
commit: f35a7b0e70032de2feec9f3bda09da44cf0e1073 (patch)
tree: 1e0e09581dd3707d0ceb93346452dd14451a8423 /plugins/tipue-search/tipue_search.py
1 files changed, 212 insertions, 0 deletions
diff --git a/plugins/tipue-search/tipue_search.py b/plugins/tipue-search/tipue_search.py
new file mode 100644
index 0000000..19ef68f
--- /dev/null
+++ b/plugins/tipue-search/tipue_search.py
@@ -0,0 +1,212 @@
+# -*- coding: utf-8 -*-
+"""
+Tipue Search
+============
+
+A Pelican plugin to serialize generated HTML to JSON
+that can be used by jQuery plugin - Tipue Search.
+
+Copyright (c) Talha Mansoor
+"""
+
+from __future__ import unicode_literals
+
+import os.path
+import json
+import re
+from bs4 import BeautifulSoup
+from codecs import open
+try:
+    from urlparse import urljoin
+except ImportError:
+    from urllib.parse import urljoin
+
+from pelican import signals
+
+
+class Tipue_Search_JSON_Generator(object):
+
+    def __init__(self, context, settings, path, theme, output_path, *null):
+
+        self.output_path = output_path
+        self.context = context
+        self.siteurl = settings.get('SITEURL')
+        self.relative_urls = settings.get('RELATIVE_URLS')
+        self.tpages = settings.get('TEMPLATE_PAGES')
+        self.tstatic = settings.get('THEME_STATIC_DIR')
+        self.output_path = output_path
+        self.json_nodes = []
+
+    def normalize(self, s):
+        replacements = (
+            ("á", "a"),
+            ("é", "e"),
+            ("í", "i"),
+            ("ó", "o"),
+            ("ú", "u"),
+            (".", ""),
+        )
+        s = s.lower()
+        for a, b in replacements:
+            s = s.replace(a, b).replace(a.lower(), b.lower())
+
+        s = re.sub(r"([a-z]) ([a-z])", r"\1-\2", s, 0,
+                   re.IGNORECASE | re.DOTALL)
+        return s
+
+    def create_json_node(self, article):
+
+        if getattr(article, 'status', 'published') != 'published':
+            return
+
+        soup_title = BeautifulSoup(
+            article.title.replace('&nbsp;', ' '), 'html.parser')
+        video_title = soup_title.get_text(' ', strip=True).replace(
+            '“', '"').replace(
+                '”', '"').replace(
+                    '’', "'").replace('^', '&#94;')
+
+        # description
+        art_desc = BeautifulSoup(article.content, 'html.parser')
+
+        # fix ignore <h1> inside <figure> description
+        try:
+            art_desc = art_desc.find('figure').find_all_next('p')
+            art_desc_html = ''.join(map(str, art_desc))
+            art_desc = BeautifulSoup(art_desc_html, 'html.parser')
+            video_desc_html = art_desc_html.replace('\n', '&#32;')
+        except:
+            video_desc_html = ''.join(
+                map(str, art_desc)).replace('\n', '&#32;')
+            pass
+
+        video_desc_text = art_desc.get_text(' ', strip=True).replace(
+            '“', '"').replace(
+                '”', '"').replace(
+                    '’', "'").replace(
+                        '¶', ' ').replace('^', '&#94;')
+
+        video_desc_text = ' '.join(video_desc_text.split())
+
+        # base url
+        if self.relative_urls:
+            base_url = '.'
+        else:
+            base_url = self.siteurl
+
+        # videoid
+        video_id = str(article.videoid) if getattr(
+            article, 'videoid', 'None') != 'None' else ''
+
+        # thumbnail
+        video_image = article.image if getattr(
+            article, 'image', 'None') != 'None' else ''
+
+        url_image = "%s/%s/../wp-content/uploads/article/thumbnail/%s" % (
+            base_url, self.tstatic, video_image
+        )
+
+        # publish
+        video_publish = article.date.isoformat() if getattr(
+            article, 'date', 'None') != 'None' else ''
+
+        # publish_text
+        video_publish_text = article.date.strftime("%a, %d %B, %Y") if getattr(
+            article, 'date', 'None') != 'None' else ''
+
+        # author
+        video_author = str(article.author) if getattr(
+            article, 'author', 'None') != 'None' else ''
+
+        # author url
+        video_author_url = "%s/author/%s/" % (
+            base_url, self.normalize(video_author)
+        )
+
+        # time
+        video_time = article.time if getattr(
+            article, 'time', 'None') != 'None' else ''
+
+        video_url = '.'
+        if article.url:
+            video_url = article.url if self.relative_urls else (
+                self.siteurl + '/' + article.url)
+
+        video_src = article.og_video if getattr(
+            article, 'og_video', 'None') != 'None' else ''
+
+        # category
+        video_category = article.category.name if getattr(
+            article, 'category', 'None') != 'None' else ''
+
+        # tags
+        data_tags = ['%s' % (tag) for tag in article.tags]
+        video_tags = dict((num, tag) for num, tag in enumerate(data_tags))
+
+        node = {
+            'videoId': video_id,
+            'title': video_title,
+            'description': video_desc_text,
+            'descriptionHtml': video_desc_html,
+            'videoThumbnail': url_image,
+            'formatStreams': {
+                'url': video_src,
+            },
+            'author': video_author,
+            'authorUrl': video_author_url,
+            'published': video_publish,
+            'publishedText': video_publish_text,
+            'time': video_time,
+            'category': video_category,
+            'keywords': video_tags,
+            'url': video_url
+        }
+
+        self.json_nodes.append(node)
+
+    def create_tpage_node(self, srclink):
+
+        srcfile = open(os.path.join(self.output_path,
+                                    self.tpages[srclink]),
+                       encoding='utf-8')
+        soup = BeautifulSoup(srcfile, 'html.parser')
+        video_title = soup.title.string if soup.title is not None else ''
+        video_text = soup.get_text()
+
+        # Should set default category
+        video_category = ''
+        video_url = urljoin(self.siteurl, self.tpages[srclink])
+
+        node = {'title': video_title,
+                'text': video_text,
+                'tags': video_category,
+                'url': video_url}
+
+        self.json_nodes.append(node)
+
+    def generate_output(self, writer):
+        path = os.path.join(self.output_path, 'tipuesearch_content.json')
+
+        articles = self.context['articles']
+
+        for article in self.context['articles']:
+            articles += article.translations
+
+        for srclink in self.tpages:
+            self.create_tpage_node(srclink)
+
+        for article in articles:
+            self.create_json_node(article)
+
+        root_node = {'videos': self.json_nodes}
+
+        with open(path, 'w', encoding='utf-8') as fd:
+            json.dump(root_node, fd, separators=(',', ':'), ensure_ascii=False)
+
+
+def get_generators(generators):
+    return Tipue_Search_JSON_Generator
+
+
+def register():
+    signals.get_generators.connect(get_generators)
author	srv <enmanuel.saravia.externo@pandero.com.pe>	2025-04-28 17:11:28 -0500
committer	srv <enmanuel.saravia.externo@pandero.com.pe>	2025-04-28 17:11:28 -0500
commit	f35a7b0e70032de2feec9f3bda09da44cf0e1073 (patch)
tree	1e0e09581dd3707d0ceb93346452dd14451a8423 /plugins/tipue-search/tipue_search.py