Added SINGLE_PROCESS environment variable
This commit is contained in:
		
							parent
							
								
									b310ca2fc2
								
							
						
					
					
						commit
						2bcf6cbbf7
					
				
					 4 changed files with 8 additions and 3 deletions
				
			
		|  | @ -2,3 +2,4 @@ RAPID7_API_KEY= | |||
| CACHE_SIZE=536870912 | ||||
| MASSDNS_HASHMAP_SIZE=1000 | ||||
| PROFILE=0 | ||||
| SINGLE_PROCESS=0 | ||||
|  |  | |||
|  | @ -49,7 +49,9 @@ Depending on the sources you'll be using to generate the list, you'll need to in | |||
| 
 | ||||
| The so-called database (in the form of `blocking.p`) is a file storing all the matching entities (ASN, IPs, hostnames, zones…) and every entity leading to it. | ||||
| It exists because the list cannot be generated in one pass, as DNS redirections chain links do not have to be inputed in order. | ||||
| 
 | ||||
| You can purge of old records the database by running `./prune.sh`. | ||||
| When you remove a source of data, remove its corresponding file in `last_updates` to fix the pruning process. | ||||
| 
 | ||||
| ### Gather external sources | ||||
| 
 | ||||
|  | @ -143,7 +145,7 @@ This will download about 35 GiB of data the first time, but only the matching re | |||
| Note the download speed will most likely be limited by the database operation thoughput (a quick RAM will help). | ||||
| 
 | ||||
| The script remembers which were the last sets downloaded, and will only import newer sets. | ||||
| If you want to force re-importing, run `rm temp/rapid7_timestamps/sonar.*`. | ||||
| If you want to force re-importing, run `rm last_updates/rapid7_*.txt`. | ||||
| 
 | ||||
| ### Export the lists | ||||
| 
 | ||||
|  |  | |||
|  | @ -53,7 +53,8 @@ function feed_rapid7 { # study, dataset | |||
|     then | ||||
|         link="$(get_download_url $study $dataset)" | ||||
|         log "Reading $dataset dataset from $link ($old_ts -> $new_ts)…" | ||||
|         # curl -L "$link" | gunzip | ./feed_dns.py rapid7 $@ | ||||
|         [ $SINGLE_PROCESS -eq 1 ] && EXTRA_ARGS="--single-process" | ||||
|         curl -L "$link" | gunzip | ./feed_dns.py rapid7 $@ $EXTRA_ARGS | ||||
|         if [ $? -eq 0 ] | ||||
|         then | ||||
|             echo $new_ts > $old_ts_file | ||||
|  |  | |||
|  | @ -20,4 +20,5 @@ date +%s > "last_updates/massdns.txt" | |||
| massdns --output Snrql --retry REFUSED,SERVFAIL --hashmap-size "$MASSDNS_HASHMAP_SIZE" --resolvers temp/all_nameservers_ip4.list --outfile temp/all_resolved.txt temp/all_subdomains.list | ||||
| 
 | ||||
| log "Importing into database…" | ||||
| pv temp/all_resolved.txt | ./feed_dns.py massdns | ||||
| [ $SINGLE_PROCESS -eq 1 ] && EXTRA_ARGS="--single-process" | ||||
| pv temp/all_resolved.txt | ./feed_dns.py massdns --ip4-cache "$CACHE_SIZE" $EXTRA_ARGS | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue