Commit 9caf9988 authored by Shubham  .'s avatar Shubham .
Browse files

Add scripts for extracting octaves indices

parent 92753896
This diff is collapsed.
### Steps to extract the indices from `octave's official documentation` to add them to `qhp` under `keywords`
Copy the `HTML` files named `Concept-Index.html`, `Function-Index.html` and `Operator-Index.html` from `octave-5.2.0/doc/interpreter/octave.html/` to here. Then run the python scripts named `concept_index_parser.py`, `function_index_parser.py` and `operator_index_parser.py` one by one using `python <script-name>`. On running these scripts, an output file named `output.txt` would be generated. It would be containing all the indices. Simply copy the file contents and paste under the `<keywords>` section inside `qhp` file.
# script to parse Concept-Index.html and collect all the keywords into index.txt
from bs4 import BeautifulSoup
def is_ascii(s):
return all(ord(c) < 128 for c in s)
fp = open('./Concept-Index.html', 'r')
fp2 = open('./index.txt', 'a')
html = fp.read()
soup = BeautifulSoup(html, features='html.parser')
for i in soup.find_all('tr'):
link = i.findChildren()[1].findChild()
if link is not None:
if(is_ascii(link.text)):
fp2.write('<keyword name = "{}" ref = "{}"/>\n'.format(link.text, link['href']))
fp.close()
fp2.close()
# script to parse Function-Index.html and collect all the keywords into index.txt
from bs4 import BeautifulSoup
fp = open('./Function-Index.html', 'r')
fp2 = open('./index.txt', 'a')
html = fp.read()
soup = BeautifulSoup(html, features='html.parser')
for i in soup.find_all('code'):
index = i.text
ref = i.find_parent('a')
fp2.write('<keyword name = "{}" ref = "{}"/>\n'.format(index, ref['href']))
fp.close()
fp2.close()
# script to parse Operator-Index.html and collect all the keywords into index.txt
from bs4 import BeautifulSoup
def is_ascii(s):
return all(ord(c) < 128 for c in s)
fp = open('./Operator-Index.html', 'r')
fp2 = open('./index.txt', 'a')
html = fp.read()
soup = BeautifulSoup(html, features='html.parser')
for i in soup.find_all('tr'):
link = i.findChildren()[1].findChild()
if link is not None:
if(is_ascii(link.text)):
fp2.write('<keyword name = "{}" ref = "{}"/>\n'.format(link.text, link['href']))
fp.close()
fp2.close()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment