{ "cells": [ { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "D:\\PATSTAT\n", "D:\\PATSTAT\n" ] } ], "source": [ "\n", "import dask\n", "\n", "dask.config.set(temporary_directory=r'D:\\PATSTAT\\dask_temp')\n", "dask.config.set({'temporary_directory': r'D:\\PATSTAT\\dask_temp'})\n", "dask.config.config\n", "import dask.dataframe as dd\n", "import os\n", "\n", "import os\n", "print(os.getcwd()) # Prints the current working directory\n", "\n", "workdir_path=r\"D:\\PATSTAT\"\n", "os.chdir(workdir_path)\n", "print(os.getcwd())" ] }, { "cell_type": "code", "execution_count": 2, "outputs": [], "source": [ "tls_206 = dd.read_csv(\"table_tls206.csv\", low_memory=False)\n", "tls_206.to_parquet(\"tls_206.parquet\")\n", "# %%time\n", "#Person data\n", "tls_206_p = dd.read_parquet(\"tls_206.parquet\")" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 10, "outputs": [], "source": [ "tls_209 = dd.read_csv(\"table_tls209.csv\", low_memory=False)\n", "tls_209.to_parquet(\"tls_209.parquet\")\n", "# %%time\n", "#Person data\n", "tls_209_p = dd.read_parquet(\"tls_209.parquet\")" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 3, "outputs": [], "source": [ "# import pandas as pd\n", "# appln_pers_f = pd.read_csv(r\"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\\first_round\\first-filings-with-persons-raw.csv\", header=None,\n", "# names=[\"appln_id\",\"appln_auth\",\"person_id\",\" invt_seq_nr\",\"applt_seq_nr\",'person_name',\"person_ctry_code\"])" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 4, "outputs": [], "source": [ "import pandas as pd\n", "\n", "outdir=\"raw_files_csv\"\n", "appln_pers_f = pd.read_csv(r\"C:/Users/radvanyi/PycharmProjects/ZSI_analytics/PATSTAT/raw_files_csv/02_persons_2011_2022.csv\",low_memory=False)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 5, "outputs": [], "source": [ "pers_id_scope = appln_pers_f[\"person_id\"].unique()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 6, "outputs": [], "source": [ "tls_206_scope = tls_206_p[tls_206_p['person_id'].isin(pers_id_scope)]\n", "tls_206_scope.compute().to_csv(r\"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\\first_round\\tls_206_scope_v2.csv\",index=False)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 12, "outputs": [], "source": [ "appln_id_scope = appln_pers_f[\"appln_id\"].unique()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 13, "outputs": [ { "data": { "text/plain": "12646904" }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(appln_id_scope)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 14, "outputs": [], "source": [ "tls_209_p_scope =tls_209_p[tls_209_p['appln_id'].isin(appln_id_scope)]\n", "tls_209_p_scope.compute().to_csv(r\"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\\first_round\\tls_209_IPC_scope.csv\",index=False)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 7, "outputs": [], "source": [ "df_206 = tls_206_scope.compute()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 8, "outputs": [ { "data": { "text/plain": " person_id person_name \n4023 4025 Meritor Heavy Vehicle Braking Systems (UK) Lim... \\\n4347 4349 Fraser, Stuart \n5627 5629 Xaar Technology Limited \n5811 5813 SIEMENS PLC \n6499 6501 BAE Systems PLC \n... ... ... \n366118 88823960 WARD, Lauren \n366119 88823961 WÃRTSILÃ UK LIMITED \n366130 88823972 Xavier Erdödy \n366135 88823977 Zeg.AI Ltd \n366137 88823979 Zhiyang Pan \n\n person_name_orig_lg \n4023 Meritor Heavy Vehicle Braking Systems (UK) Lim... \\\n4347 Fraser, Stuart \n5627 Xaar Technology Limited \n5811 SIEMENS PLC \n6499 BAE Systems PLC \n... ... \n366118 WARD, Lauren \n366119 WÃRTSILÃ UK LIMITED \n366130 Xavier Erdödy \n366135 Zeg.AI Ltd \n366137 Zhiyang Pan \n\n person_address person_ctry_code \n4023 Grange Road Cwmbran,Gwent NP44 3XU GB \\\n4347 Fernhill Lees Lane,Little Neston Cheshire CH64... GB \n5627 Unit 316, Science Park,Cambridge CB4 0XR GB \n5811 Faraday House Sir William Siemens Square Friml... GB \n6499 6 Carlton Gardens,London SW1Y 5AD GB \n... ... ... \n366118 None GB \n366119 None GB \n366130 None GB \n366135 None GB \n366137 None GB \n\n nuts nuts_level doc_std_name_id \n4023 UKL16 3 25273975 \\\n4347 UKD63 3 3738 \n5627 UKH12 3 4824 \n5811 UKJ25 3 4979 \n6499 UKI32 3 5583 \n... ... ... ... \n366118 UK 0 40301088 \n366119 UK 0 21929085 \n366130 UK 0 40578262 \n366135 UK 0 37017676 \n366137 UK 0 17409767 \n\n doc_std_name psn_id \n4023 MERITOR HEAVY VEHICLE BRAKING SYSTEMS UK LTD 21718818 \\\n4347 FRASER STUART 9243356 \n5627 XAAR TECHNOLOGY LTD 35706185 \n5811 SIEMENS PLC 30138991 \n6499 BAE SYSTEMS PLC 1787059 \n... ... ... \n366118 WARD LAUREN 188823960 \n366119 RTSIL UK LTD W 188823961 \n366130 XAVIER ERDÖDY 188823972 \n366135 ZEG AI LTD 188823977 \n366137 ZHIYANG PAN 188823979 \n\n psn_name psn_level psn_sector \n4023 MERITOR HEAVY VEHICLE BRAKING SYSTEMS (UK) 1 COMPANY \\\n4347 FRASER, STUART 0 None \n5627 XAAR TECHNOLOGY 1 COMPANY \n5811 SIEMENS 2 COMPANY \n6499 BAE SYSTEMS 2 COMPANY \n... ... ... ... \n366118 WARD, Lauren 0 UNKNOWN \n366119 WÃRTSILÃ UK LIMITED 0 UNKNOWN \n366130 Xavier Erdödy 0 UNKNOWN \n366135 Zeg.AI Ltd 0 UNKNOWN \n366137 Zhiyang Pan 0 UNKNOWN \n\n han_id han_name \n4023 1940089 MERITOR HEAVY VEHICLE BRAKING SYSTEMS UK LTD \\\n4347 100004349 Fraser, Stuart \n5627 3228426 XAAR TECH LTD \n5811 2755905 SIEMENS PLC \n6499 208539 BAE SYSTEMS PLC \n... ... ... \n366118 188823960 WARD, Lauren \n366119 188823961 WÃRTSILÃ UK LIMITED \n366130 188823972 Xavier Erdödy \n366135 188823977 Zeg.AI Ltd \n366137 188823979 Zhiyang Pan \n\n han_harmonized \n4023 2 \n4347 0 \n5627 2 \n5811 2 \n6499 2 \n... ... \n366118 0 \n366119 0 \n366130 0 \n366135 0 \n366137 0 \n\n[77303 rows x 16 columns]", "text/html": "
\n | person_id | \nperson_name | \nperson_name_orig_lg | \nperson_address | \nperson_ctry_code | \nnuts | \nnuts_level | \ndoc_std_name_id | \ndoc_std_name | \npsn_id | \npsn_name | \npsn_level | \npsn_sector | \nhan_id | \nhan_name | \nhan_harmonized | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
4023 | \n4025 | \nMeritor Heavy Vehicle Braking Systems (UK) Lim... | \nMeritor Heavy Vehicle Braking Systems (UK) Lim... | \nGrange Road Cwmbran,Gwent NP44 3XU | \nGB | \nUKL16 | \n3 | \n25273975 | \nMERITOR HEAVY VEHICLE BRAKING SYSTEMS UK LTD | \n21718818 | \nMERITOR HEAVY VEHICLE BRAKING SYSTEMS (UK) | \n1 | \nCOMPANY | \n1940089 | \nMERITOR HEAVY VEHICLE BRAKING SYSTEMS UK LTD | \n2 | \n
4347 | \n4349 | \nFraser, Stuart | \nFraser, Stuart | \nFernhill Lees Lane,Little Neston Cheshire CH64... | \nGB | \nUKD63 | \n3 | \n3738 | \nFRASER STUART | \n9243356 | \nFRASER, STUART | \n0 | \nNone | \n100004349 | \nFraser, Stuart | \n0 | \n
5627 | \n5629 | \nXaar Technology Limited | \nXaar Technology Limited | \nUnit 316, Science Park,Cambridge CB4 0XR | \nGB | \nUKH12 | \n3 | \n4824 | \nXAAR TECHNOLOGY LTD | \n35706185 | \nXAAR TECHNOLOGY | \n1 | \nCOMPANY | \n3228426 | \nXAAR TECH LTD | \n2 | \n
5811 | \n5813 | \nSIEMENS PLC | \nSIEMENS PLC | \nFaraday House Sir William Siemens Square Friml... | \nGB | \nUKJ25 | \n3 | \n4979 | \nSIEMENS PLC | \n30138991 | \nSIEMENS | \n2 | \nCOMPANY | \n2755905 | \nSIEMENS PLC | \n2 | \n
6499 | \n6501 | \nBAE Systems PLC | \nBAE Systems PLC | \n6 Carlton Gardens,London SW1Y 5AD | \nGB | \nUKI32 | \n3 | \n5583 | \nBAE SYSTEMS PLC | \n1787059 | \nBAE SYSTEMS | \n2 | \nCOMPANY | \n208539 | \nBAE SYSTEMS PLC | \n2 | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
366118 | \n88823960 | \nWARD, Lauren | \nWARD, Lauren | \nNone | \nGB | \nUK | \n0 | \n40301088 | \nWARD LAUREN | \n188823960 | \nWARD, Lauren | \n0 | \nUNKNOWN | \n188823960 | \nWARD, Lauren | \n0 | \n
366119 | \n88823961 | \nWÃRTSILÃ UK LIMITED | \nWÃRTSILÃ UK LIMITED | \nNone | \nGB | \nUK | \n0 | \n21929085 | \nRTSIL UK LTD W | \n188823961 | \nWÃRTSILÃ UK LIMITED | \n0 | \nUNKNOWN | \n188823961 | \nWÃRTSILÃ UK LIMITED | \n0 | \n
366130 | \n88823972 | \nXavier Erdödy | \nXavier Erdödy | \nNone | \nGB | \nUK | \n0 | \n40578262 | \nXAVIER ERDÖDY | \n188823972 | \nXavier Erdödy | \n0 | \nUNKNOWN | \n188823972 | \nXavier Erdödy | \n0 | \n
366135 | \n88823977 | \nZeg.AI Ltd | \nZeg.AI Ltd | \nNone | \nGB | \nUK | \n0 | \n37017676 | \nZEG AI LTD | \n188823977 | \nZeg.AI Ltd | \n0 | \nUNKNOWN | \n188823977 | \nZeg.AI Ltd | \n0 | \n
366137 | \n88823979 | \nZhiyang Pan | \nZhiyang Pan | \nNone | \nGB | \nUK | \n0 | \n17409767 | \nZHIYANG PAN | \n188823979 | \nZhiyang Pan | \n0 | \nUNKNOWN | \n188823979 | \nZhiyang Pan | \n0 | \n
77303 rows × 16 columns
\n