Skip to content

Commit

Permalink
change OUTPUT_DIR to /tmp
Browse files Browse the repository at this point in the history
  • Loading branch information
themousepotato committed Feb 26, 2021
1 parent 4100117 commit 4fdfb9a
Show file tree
Hide file tree
Showing 27 changed files with 36 additions and 37 deletions.
2 changes: 1 addition & 1 deletion unscrapulous/arbitration-awards-bse.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://www.bseindia.com/investors/ArbitAwards.aspx'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'arbitration-awards-bse.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/arbitration-awards-nse.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://www1.nseindia.com/invest/dynaContent/arbitration_award.jsp?requestPage=main&qryFlag=yes'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'arbitration-awards-nse.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/bse-defaulter-and-expelled-members.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://www.bseindia.com/static/members/List_defaulters_Expelled_members.aspx'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'bse-defaulter-and-expelled-members.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/icex-defaulter-members.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://www.icexindia.com/membership/expelled-defaulter-surrendered-members'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'icex-defaulter-members.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/icex-expelled-members.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://www.icexindia.com/membership/expelled-defaulter-surrendered-members'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'icex-expelled-members.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/income-tax-defaulters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

SOURCE = 'http://office.incometaxindia.gov.in/administration/_layouts/15/inplview.aspx?List={5A26177B-D7A0-4251-843D-5E6C0B3C3DF2}&View={D8DD9754-8FD1-4D72-9908-727646E99CA0}&ViewCount=450&IsXslView=TRUE&IsCSR=TRUE&Paged=TRUE&p_ID='
FILE_URL = 'http://office.incometaxindia.gov.in/administration/Lists/Tax%20Defaulters/AllItems.aspx'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'income-tax-defaulters.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/irda-blacklisted.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import datetime

SOURCE = 'https://agencyportal.irdai.gov.in/PublicAccess/BlackListedAgent.aspx'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'irda-blacklisted.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/mca-company-defaulter-list.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

PARENT_SOURCE = 'http://www.mca.gov.in'
SOURCE = 'http://www.mca.gov.in/MinistryV2/defaultercompanieslist.html'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'mca-company-defaulter-list.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/mca-director-defaulter-list.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

PARENT_SOURCE = 'http://www.mca.gov.in'
SOURCE = 'http://www.mca.gov.in/MinistryV2/defaulterdirectorslist.html'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'mca-director-defaulter-list.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/mca-director-disqualified-list.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

PARENT_SOURCE = 'http://www.mca.gov.in'
SOURCE = 'http://www.mca.gov.in/MinistryV2/disqualifieddirectorslist.html'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'mca-director-disqualified-list.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/mca-proclaimed-offenders-ind.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

PARENT_SOURCE = 'http://www.mca.gov.in'
SOURCE = 'http://www.mca.gov.in/MinistryV2/proclaimedoffenders.html'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'mca-proclaimed-offenders-ind.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/mcx-action-ap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

PARENT_SOURCE = 'https://www.mcxindia.com'
SOURCE = 'https://www.mcxindia.com/membership/notice-board/notice-board-disciplinary-action'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'mcx-action-ap.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/mcx-defaulter-members.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

PARENT_SOURCE = 'https://www.mcxindia.com'
SOURCE = 'https://www.mcxindia.com/membership/notice-board/notice-board-disciplinary-action'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'mcx-defaulter-members.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/mcx-secretaries-defaulter-list.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://www.mcxindia.com/Investor-Services/defaulters/defaulters-list'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'mcx-secretaries-defaulter-list.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/mse-arbitral-awards.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://www.msei.in/investors/list-of-arbitrators'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'sfio-convicted.csv'

def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://ncdex.com/suspended_member/latest_info'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'ncdex-suspended-defaulted-expelled-debarred-members.csv'

FORM_DATA = {
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/nse-defaulted-members.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

SOURCE = 'https://www1.nseindia.com/invest/json/def_members.json'
FILE_PARENT_URL = 'https://www1.nseindia.com/invest/resources/download/'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'nse-defaulted-members.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/nse-expelled-members.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

SOURCE = 'https://www1.nseindia.com/invest/json/exp_members.json'
FILE_PARENT_URL = 'https://www1.nseindia.com/invest/resources/download/'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'nse-expelled-members.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/nse-regulatory-defaulting-clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://www.nseindia.com/regulations/exchange-defaulting-clients'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'nse-regulatory-defaulting-clients.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/sebi-debarred-bse.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

PARENT_SOURCES = ['https://www.bseindia.com', 'https://www.bseindia.com/investors/']
SOURCE = 'https://www.bseindia.com/investors/debent.aspx'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'sebi-debarred-bse.csv'

def main():
Expand Down
15 changes: 6 additions & 9 deletions unscrapulous/sebi-debarred-nse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,28 @@
#-*- coding: utf-8 -*-

from utils import *
import os
import shutil

SOURCE = 'https://www.nseindia.com/regulations/member-sebi-debarred-entities'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files/sebi-debarred-nse')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'sebi-debarred-nse.csv'

def main():
create_dir(OUTPUT_DIR)
soup = get_soup(SOURCE)
file_url = soup.find('a', {'class' : 'file--mime-application-vnd-ms-excel'})['href']
file_sources = download_files([file_url], OUTPUT_DIR)
filenames = list(file_sources.keys())
filenames = list(download_files([file_url], OUTPUT_DIR).keys())
convert_into_csv(filenames=filenames, output_dir=OUTPUT_DIR, ext='xls')
delete_files(filenames)

filename = os.path.join(OUTPUT_DIR, filenames[0].replace('xls', 'csv'))
out_filename = os.path.join(os.getcwd(), 'files', OUTPUT_FILE)
shutil.move(filename, out_filename)
out_filename = os.path.join(OUTPUT_DIR, OUTPUT_FILE)
os.rename(filename, out_filename)
alias = {
'PAN': 'PAN',
'Name': 'Entity / Individual Name',
'AddedDate': 'Order Date'
}
write_global_csv(filename=out_filename, source=SOURCE, alias=alias, fillna=True)
shutil.rmtree(OUTPUT_DIR)

if __name__ == '__main__':
main()
main()
2 changes: 1 addition & 1 deletion unscrapulous/sfio-convicted.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://sfio.nic.in/'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'sfio-convicted.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/sfio-proclaimed-offenders.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://sfio.nic.in/'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'sfio-proclaimed-offenders.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/unsc-1988.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://scsanctions.un.org/taliban/'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'unsc-1988-list.csv'

def main():
Expand Down
2 changes: 1 addition & 1 deletion unscrapulous/unsc-consolidated-list.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'https://www.un.org/securitycouncil/content/un-sc-consolidated-list'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'unsc-consolidated-list.csv'

def main():
Expand Down
6 changes: 4 additions & 2 deletions unscrapulous/unscrapulous.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import argparse
import toml

OUTPUT_DIR = '/tmp/unscrapulous/files'

def main():
parser = argparse.ArgumentParser()
parser.add_argument('--config', help='Path to the config file', default='config.toml')
Expand All @@ -24,8 +26,8 @@ def main():
for filename in filenames:
call(['python', filename + '.py'])

csv_files = [os.path.join('files', f) for f in os.listdir('files') if f.endswith('.csv')]
csv_files = [os.path.join(OUTPUT_DIR, f) for f in os.listdir(OUTPUT_DIR) if f.endswith('.csv')]
merge_csvs(filenames=csv_files, output_filename=output_filename, delete=True)

if __name__ == '__main__':
main()
main()
4 changes: 2 additions & 2 deletions unscrapulous/wildlife-crime-convicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from utils import *

SOURCE = 'http://wccb.gov.in/Content/Convicts.aspx'
OUTPUT_DIR = os.path.join(os.getcwd(), 'files')
OUTPUT_DIR = '/tmp/unscrapulous/files'
OUTPUT_FILE = 'wildlife-crime-convicts.csv'

def main():
Expand All @@ -19,4 +19,4 @@ def main():
write_global_csv(filename=os.path.join(OUTPUT_DIR, OUTPUT_FILE), source=SOURCE, alias=alias)

if __name__ == '__main__':
main()
main()

0 comments on commit 4fdfb9a

Please sign in to comment.