Skip to content

Commit

Permalink
Merge pull request #42 from miha42-github/V3.0.1
Browse files Browse the repository at this point in the history
README cleanup, fixes for bad returns from SIC and Wikipedia.
  • Loading branch information
miha42-github authored Apr 20, 2024
2 parents 5d765e6 + c08289b commit 4e17aa8
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 28 deletions.
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,18 @@ We try to keep high level Todos and Improvements in a list contained in a sectio

### Future work/Todos
Here are the things that are likely to be worked but without any strict deadline:
4. Determine if feasible to talk to the companies house API for gathering data from the UK
5. Research other pools of public data which can serve to enrich
6. Evaluate if financial data can be added from EDGAR, Wikipedia and Companies House
8. Provide instructions/details for running on a Pi or Arm based system, see Lagniappe below

1. Determine if feasible to talk to the companies house API for gathering data from the UK
1. Initial feasibility has been checked, but the value of the data is still being evaluated
2. Research other pools of public data which can serve to enrich
1. There are additional data pools including NAICS and UK SIC codes which could be added. Additional Industry Code data sources by country are likely a first target to add. The deeper question is how to merge these data sources for a kind of universal classification.
3. Evaluate if financial data can be added from EDGAR, Wikipedia and Companies House
4. Provide instructions/details for running on a Pi or Arm based system
1. Since one of the target docker images is for ARM, the next logical step is to provide instructions for running on a Pi.

### The Lagniappe
Run on a RasberryPi: To be reauthored


# License
Since this code falls under a liberal Apache-V2 license it is provided as is, without warranty or guarantee of support. Feel free to fork the code, but please provide attribution to the authors.

Expand Down
10 changes: 3 additions & 7 deletions company_dns.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ async def general_query(request):
# Log the query request as a debug message
logger.debug(f'Performing general query for company name: [{request.path_params["company_name"]}]')
company_wiki_data = gq.get_firmographics_wikipedia()
# logger.debug(f'Company wiki data: {company_wiki_data}')
if company_wiki_data['code'] != 200:
logger.error(f'There were [0] results for resource [company_name].')
return JSONResponse(company_wiki_data)
Expand Down Expand Up @@ -96,7 +97,7 @@ def _check_status_and_return(result_data, resource_name):
return result_data

def _prepare_logging(log_level=logging.INFO):
logging.basicConfig(format='%(levelname)s:\t%(asctime)s [module: %(name)s] %(message)s', level=logging.DEBUG)
logging.basicConfig(format='%(levelname)s:\t%(asctime)s [module: %(name)s] %(message)s', level=log_level)
return logging.getLogger(__file__)

def _handle_request(request, handler, func, path_param, *args, **kwargs):
Expand Down Expand Up @@ -191,12 +192,7 @@ async def dispatch(self, request, call_next):
# -------------------------------------------------------------- #

# Serve the local directory ./html at the /help
Mount('/help', app=StaticFiles(directory='html', html=True)),

# Catch-all route which redirects to /help
# Route("/{path:path}", endpoint=lambda _: RedirectResponse(url='/help'), methods=["GET"]),


Mount('/help', app=StaticFiles(directory='html', html=True)),
])
# END: Define the Starlette app
# -------------------------------------------------------------- #
Expand Down
2 changes: 1 addition & 1 deletion lib/sic.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def get_all_sic_by_name(self):
# Issue the query
for row in self.ec.execute(sql_query):

self.logger.debug('Processing row [' + row + ']')
self.logger.debug(f'Processing row [{row}]')

# Get the fields in a structure we can manipulate
sic_code = str(row[SICS])
Expand Down
32 changes: 17 additions & 15 deletions lib/wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,28 +105,30 @@ def get_firmographics(self):
}

# TODO try to do the right thing by trying different common combinations like Company, Inc.; Company Corp, etc.
try:
# Log the start of this process including self.query
self.logger.info('Starting retrieval of firmographics for [' + self.query + '] via its wikipedia page.')
company_page = wptools.page(self.query, silent=True)
# Log the completion of the page creation
self.debug(f'Page results for [{self.query}]: {company_page}')
except:
# Log the start of this process including self.query
self.logger.info('Starting retrieval of firmographics for [' + self.query + '] via its wikipedia page.')
company_page = wptools.page(self.query, silent=True)
if not company_page:
self.logger.error('A wikipedia page for [' + self.query + '] was not found.')
return lookup_error
# Log the completion of the page creation
self.logger.debug(f'Page results for [{self.query}]: {company_page}')

# Prepare to get the infoblox for the company
try:
# Log the start of the process to get the infobox for the company
self.logger.info('Starting process to retrieve infobox for [' + self.query + '].')
parse_results = company_page.get_parse(show=False)
# Log the completion of the infobox creation
self.logger.info('Completed infobox retrieval for [' + self.query + '].')
except:
# Log the start of the process to get the infobox for the company
self.logger.info('Starting process to retrieve infobox for [' + self.query + '].')
parse_results = company_page.get_parse(show=False)
if not parse_results.data['infobox']:
self.logger.error('An infobox for [' + self.query + '] was not found.')
return lookup_error
# Log the completion of the infobox creation
self.logger.info('Completed infobox retrieval for [' + self.query + '].')

company_info = parse_results.data['infobox']
if not company_info: return lookup_error
if not company_info:
self.logger.error('An infobox for [' + self.query + '] was not found.')
return lookup_error
self.logger.info('Completed infobox parse for [' + self.query + '].')

# Obtain the query results
try:
Expand Down

0 comments on commit 4e17aa8

Please sign in to comment.